├── .cvsignore ├── .github └── workflows │ └── ccpp.yaml ├── .gitignore ├── .project ├── .travis.yml ├── README.md ├── appveyor.yml ├── msys_build.sh └── src ├── .cvsignore ├── AUTHORS ├── CMakeLists.txt ├── COPYING ├── INSTALL ├── Makefile.am ├── Makefile.mak ├── NEWS ├── README ├── bin ├── .cvsignore ├── Makefile.am ├── Makefile.mak └── hts_engine.c ├── config └── .cvsignore ├── configure.ac ├── hts_engine_API.pc.in ├── include └── HTS_engine.h └── lib ├── .cvsignore ├── HTS_audio.c ├── HTS_engine.c ├── HTS_gstream.c ├── HTS_hidden.h ├── HTS_label.c ├── HTS_misc.c ├── HTS_model.c ├── HTS_pstream.c ├── HTS_sstream.c ├── HTS_vocoder.c ├── Makefile.am └── Makefile.mak /.cvsignore: -------------------------------------------------------------------------------- 1 | memo.txt 2 | -------------------------------------------------------------------------------- /.github/workflows/ccpp.yaml: -------------------------------------------------------------------------------- 1 | name: C/C++ CI 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | pull_request: 7 | branches: [ master ] 8 | 9 | jobs: 10 | build: 11 | strategy: 12 | matrix: 13 | build_type: [ Release ] 14 | config: 15 | - os: ubuntu-latest 16 | cc: gcc 17 | cxx: g++ 18 | - os: ubuntu-latest 19 | cc: clang 20 | cxx: clang++ 21 | - os: macos-latest 22 | cc: gcc 23 | cxx: g++ 24 | - os: macos-latest 25 | cc: clang 26 | cxx: clang++ 27 | - os: windows-latest 28 | cc: cl 29 | cxx: cl 30 | 31 | env: 32 | CC: ${{ matrix.config.cc }} 33 | CXX: ${{ matrix.config.cxx }} 34 | 35 | runs-on: ${{ matrix.config.os }} 36 | 37 | steps: 38 | - uses: actions/checkout@v2 39 | - name: Build 40 | working-directory: src 41 | run: | 42 | cmake -D CMAKE_BUILD_TYPE=${{ matrix.build_type }} -D BUILD_SHARED_LIBS=ON -S . -B build 43 | cmake --build build --config ${{ matrix.build_type }} 44 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by https://www.gitignore.io 2 | 3 | ### Emacs ### 4 | # -*- mode: gitignore; -*- 5 | *~ 6 | \#*\# 7 | /.emacs.desktop 8 | /.emacs.desktop.lock 9 | *.elc 10 | auto-save-list 11 | tramp 12 | .\#* 13 | 14 | # Org-mode 15 | .org-id-locations 16 | *_archive 17 | 18 | # flymake-mode 19 | *_flymake.* 20 | 21 | # eshell files 22 | /eshell/history 23 | /eshell/lastdir 24 | 25 | # elpa packages 26 | /elpa/ 27 | 28 | # reftex files 29 | *.rel 30 | 31 | # AUCTeX auto folder 32 | /auto/ 33 | 34 | # cask packages 35 | .cask/ 36 | 37 | 38 | ### vim ### 39 | [._]*.s[a-w][a-z] 40 | [._]s[a-w][a-z] 41 | *.un~ 42 | Session.vim 43 | .netrwhist 44 | *~ 45 | 46 | 47 | ### C++ ### 48 | # Compiled Object files 49 | *.slo 50 | *.lo 51 | *.o 52 | *.obj 53 | 54 | # Precompiled Headers 55 | *.gch 56 | *.pch 57 | 58 | # Compiled Dynamic libraries 59 | *.so 60 | *.dylib 61 | *.dll 62 | 63 | # Fortran module files 64 | *.mod 65 | 66 | # Compiled Static libraries 67 | *.lai 68 | *.la 69 | *.a 70 | *.lib 71 | 72 | # Executables 73 | *.exe 74 | *.out 75 | *.app 76 | 77 | 78 | ### Autotools ### 79 | # http://www.gnu.org/software/automake 80 | 81 | Makefile.in 82 | 83 | # http://www.gnu.org/software/autoconf 84 | 85 | /autom4te.cache 86 | /aclocal.m4 87 | /compile 88 | /configure 89 | /depcomp 90 | /install-sh 91 | /missing 92 | /stamp-h1 93 | 94 | 95 | ### CMake ### 96 | CMakeCache.txt 97 | CMakeFiles 98 | Makefile 99 | cmake_install.cmake 100 | install_manifest.txt 101 | 102 | 103 | ### Linux ### 104 | *~ 105 | 106 | # KDE directory preferences 107 | .directory 108 | 109 | 110 | ### OSX ### 111 | .DS_Store 112 | .AppleDouble 113 | .LSOverride 114 | 115 | # Icon must end with two \r 116 | Icon 117 | 118 | 119 | # Thumbnails 120 | ._* 121 | 122 | # Files that might appear on external disk 123 | .Spotlight-V100 124 | .Trashes 125 | 126 | # Directories potentially created on remote AFP share 127 | .AppleDB 128 | .AppleDesktop 129 | Network Trash Folder 130 | Temporary Items 131 | .apdisk 132 | 133 | 134 | ### Windows ### 135 | # Windows image file caches 136 | Thumbs.db 137 | ehthumbs.db 138 | 139 | # Folder config file 140 | Desktop.ini 141 | 142 | # Recycle Bin used on file shares 143 | $RECYCLE.BIN/ 144 | 145 | # Windows Installer files 146 | *.cab 147 | *.msi 148 | *.msm 149 | *.msp 150 | 151 | # Windows shortcuts 152 | *.lnk 153 | 154 | # manually added 155 | .waf* 156 | .dropbox 157 | .lock-* 158 | *.lib 159 | *.log 160 | build 161 | Makefile 162 | .deps/ 163 | config.status -------------------------------------------------------------------------------- /.project: -------------------------------------------------------------------------------- 1 | 2 | 3 | sourceforge_hts_engine_API 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: cpp 2 | 3 | # Ubuntu 18.04 https://docs.travis-ci.com/user/reference/bionic 4 | # NOTE: this project requires cmake >= 3.11 5 | dist: bionic 6 | 7 | compiler: 8 | - gcc 9 | - clang 10 | 11 | # Just check if build success or not for now 12 | script: 13 | - cd src 14 | - mkdir -p build && cd build 15 | - cmake -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=ON .. 16 | - make -j 17 | 18 | notifications: 19 | email: false 20 | 21 | os: 22 | - linux 23 | - osx 24 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # hts_engine_API 2 | 3 | 4 | ![C/C++ CI](https://github.com/r9y9/hts_engine_API/workflows/C/C++%20CI/badge.svg) 5 | [![Build Status](https://travis-ci.org/r9y9/hts_engine_API.svg?branch=master)](https://travis-ci.org/r9y9/hts_engine_API) 6 | [![Build status](https://ci.appveyor.com/api/projects/status/7tm96g50a9i43mhl/branch/master?svg=true)](https://ci.appveyor.com/project/r9y9/hts-engine-api/branch/master) 7 | 8 | A fork of hts_engine_API 9 | 10 | ## Why 11 | 12 | Wanted to fork it with *git*. 13 | 14 | **NOTE**: To preserve history of cvs version of hts_engine_API, this fork was originially created by: 15 | 16 | ``` 17 | git cvsimport -v \ 18 | -d :pserver:anonymous@hts-engine.cvs.sourceforge.net:/cvsroot/hts-engine \ 19 | -C hts_engine_API hts_engine_API 20 | ``` 21 | 22 | ## Supported platforms 23 | 24 | - Linux 25 | - Mac OS X 26 | - Windows (gcc/msvc) 27 | 28 | ## Changes 29 | 30 | The important changes from the original hts_engine_API are summarized below: 31 | 32 | - CMake support 33 | - Add pkg-config suppport 34 | - Continuous integratioin support 35 | - keep sementic versioning http://semver.org/ 36 | -------------------------------------------------------------------------------- /appveyor.yml: -------------------------------------------------------------------------------- 1 | environment: 2 | PYTHON: "C:/Python36" 3 | matrix: 4 | # NOTE: we may want to revert this back if needed 5 | # - COMPILER: gcc 6 | # ARCH: "i686" 7 | # 8 | # - COMPILER: gcc 9 | # ARCH: "x86_64" 10 | 11 | - COMPILER: msvc 12 | ARCH: "i686" 13 | 14 | - COMPILER: msvc 15 | ARCH: "x86_64" 16 | 17 | init: 18 | - "ECHO %PYTHON%" 19 | - ps: "ls C:/Python*" 20 | - "SET PATH=%PYTHON%;%PYTHON%\\Scripts;%PATH%" 21 | 22 | cache: 23 | - mingw-w32-bin-i686-20200211.7z 24 | - mingw-w64-bin-x86_64-20200211.7z 25 | 26 | skip_commits: 27 | # Add [av skip] to commit messages for docfixes, etc to reduce load on queue 28 | message: /\[av skip\]/ 29 | 30 | notifications: 31 | - provider: Email 32 | on_build_success: false 33 | on_build_failure: false 34 | on_build_status_changed: false 35 | 36 | build_script: 37 | - C:\MinGW\msys\1.0\bin\sh --login /c/projects/hts-engine-api/msys_build.sh 38 | 39 | artifacts: 40 | - path: '**\*.dll' 41 | name: hts_engine_API 42 | -------------------------------------------------------------------------------- /msys_build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -e 4 | 5 | if [ "$ARCH" = x86_64 ]; then 6 | bits=64 7 | else 8 | bits=32 9 | fi 10 | 11 | # Use this mingw instead of the pre-installed mingw on Appveyor 12 | if [ "$COMPILER" = gcc ]; then 13 | f=mingw-w$bits-bin-$ARCH-20200211.7z 14 | if ! [ -e $f ]; then 15 | echo "Downloading $f" 16 | curl -LsSO https://sourceforge.net/projects/mingw-w64-dgn/files/mingw-w64/$f 17 | fi 18 | 7z x $f > /dev/null 19 | export PATH=$PWD/mingw$bits/bin:$PATH 20 | export CC=$PWD/mingw$bits/bin/gcc 21 | fi 22 | 23 | # Build 24 | cd /c/projects/hts-engine-api/src 25 | mkdir -p build && cd build 26 | # NOTE: it seems this does not work as expected... 27 | if [ "$COMPILER" = gcc ]; then 28 | cmake -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++ .. 29 | else 30 | cmake .. 31 | fi 32 | cmake --build . --config Release -------------------------------------------------------------------------------- /src/.cvsignore: -------------------------------------------------------------------------------- 1 | autom4te.cache 2 | aclocal.m4 3 | config.status 4 | cvs2cl.pl 5 | Makefile 6 | Makefile.in 7 | configure 8 | format.sh 9 | makeclean.sh 10 | test_mcp.sh 11 | test_mcp 12 | conf.sh 13 | *test* 14 | config.log 15 | hts_engine_API-0.99.tar.* 16 | *.bz2 17 | -------------------------------------------------------------------------------- /src/AUTHORS: -------------------------------------------------------------------------------- 1 | The hts_engine API is software to synthesize speech waveform from HMMs trained 2 | by the HMM-based speech synthesis system (HTS). This software is released 3 | under the Modified BSD license. See the COPYING file in the same directory as 4 | this file for the license. 5 | 6 | The hts_engine_API has been developed by several members of HTS working group 7 | and some graduate students in Nagoya Institute of Technology: 8 | 9 | Keiichi Tokuda http://www.sp.nitech.ac.jp/~tokuda/ 10 | (Produce and Design) 11 | Keiichiro Oura http://www.sp.nitech.ac.jp/~uratec/ 12 | (Design and Development, Main Maintainer) 13 | Heiga Zen 14 | Shinji Sako http://www.mmsp.nitech.ac.jp/~sako/ 15 | -------------------------------------------------------------------------------- /src/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.8) 2 | 3 | # The name "HTSEngine" is used in the original makefile 4 | # Due to historical reasons, let me use "hts_engine_API" instead first. 5 | # we may want to use "HTSEngine" in the future. 6 | # NOTE: we should use semanitc versioning. 1.09 -> 1.0.9! 7 | project(hts_engine_API) 8 | 9 | set(PROJECT_VER_MAJOR 1) 10 | set(PROJECT_VER_MINOR 0) 11 | set(PROJECT_VER_PATCH 9) 12 | set(PROJECT_VER "${PROJECT_VER_MAJOR}.${PROJECT_VER_MINOR}.${PROJECT_VER_PATCH}") 13 | set(PROJECT_APIVER "${PROJECT_VER_MAJOR}.${PROJECT_VER_MINOR}") 14 | 15 | option(AUDIO_PLAY_NONE "Disable audio playing support" ON) 16 | 17 | # NOTE: we can use portaudio or win32 if we really want. 18 | # I disable it by default assuming most people don't want audio playing functionality 19 | if (${AUDIO_PLAY_NONE}) 20 | add_compile_options("-D AUDIO_PLAY_NONE") 21 | endif () 22 | 23 | # set the default path for built executables to the "bin" directory 24 | set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/bin) 25 | 26 | # set the default path for built libraries to the "lib" directory 27 | set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib) 28 | set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib) 29 | 30 | configure_file(hts_engine_API.pc.in "${PROJECT_BINARY_DIR}/hts_engine_API.pc" @ONLY) 31 | 32 | file(GLOB_RECURSE SOURCE_FILES CONFIGURE_DEPENDS ${PROJECT_SOURCE_DIR}/lib/*.c) 33 | file(GLOB_RECURSE HEADER_FILES CONFIGURE_DEPENDS ${PROJECT_SOURCE_DIR}/lib/*.h ${PROJECT_SOURCE_DIR}/include/*.h) 34 | 35 | include_directories(${PROJECT_SOURCE_DIR}/include) 36 | 37 | # The hts_engine_API library 38 | add_library(${PROJECT_NAME} ${SOURCE_FILES} ${HEADER_FILES}) 39 | set_target_properties(${PROJECT_NAME} PROPERTIES 40 | VERSION ${PROJECT_VER} 41 | SOVERSION ${PROJECT_APIVER} 42 | ) 43 | 44 | # hts_engine binary 45 | add_executable(hts_engine bin/hts_engine.c) 46 | target_link_libraries(hts_engine hts_engine_API) 47 | 48 | if(${CMAKE_SYSTEM_NAME} STREQUAL "Windows") 49 | target_link_libraries(${PROJECT_NAME} winmm) 50 | target_link_libraries(hts_engine winmm) 51 | else() 52 | target_link_libraries(hts_engine m) 53 | endif() 54 | 55 | install(TARGETS ${PROJECT_NAME} hts_engine DESTINATION lib RUNTIME DESTINATION bin) 56 | install(FILES include/HTS_engine.h DESTINATION include) 57 | install(FILES "${PROJECT_BINARY_DIR}/hts_engine_API.pc" DESTINATION lib/pkgconfig/) -------------------------------------------------------------------------------- /src/COPYING: -------------------------------------------------------------------------------- 1 | /* ----------------------------------------------------------------- */ 2 | /* The HMM-Based Speech Synthesis Engine "hts_engine API" */ 3 | /* developed by HTS Working Group */ 4 | /* http://hts-engine.sourceforge.net/ */ 5 | /* ----------------------------------------------------------------- */ 6 | /* */ 7 | /* Copyright (c) 2001-2014 Nagoya Institute of Technology */ 8 | /* Department of Computer Science */ 9 | /* */ 10 | /* 2001-2008 Tokyo Institute of Technology */ 11 | /* Interdisciplinary Graduate School of */ 12 | /* Science and Engineering */ 13 | /* */ 14 | /* All rights reserved. */ 15 | /* */ 16 | /* Redistribution and use in source and binary forms, with or */ 17 | /* without modification, are permitted provided that the following */ 18 | /* conditions are met: */ 19 | /* */ 20 | /* - Redistributions of source code must retain the above copyright */ 21 | /* notice, this list of conditions and the following disclaimer. */ 22 | /* - Redistributions in binary form must reproduce the above */ 23 | /* copyright notice, this list of conditions and the following */ 24 | /* disclaimer in the documentation and/or other materials provided */ 25 | /* with the distribution. */ 26 | /* - Neither the name of the HTS working group nor the names of its */ 27 | /* contributors may be used to endorse or promote products derived */ 28 | /* from this software without specific prior written permission. */ 29 | /* */ 30 | /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */ 31 | /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */ 32 | /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ 33 | /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ 34 | /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */ 35 | /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, */ 36 | /* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED */ 37 | /* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, */ 38 | /* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */ 39 | /* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, */ 40 | /* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY */ 41 | /* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ 42 | /* POSSIBILITY OF SUCH DAMAGE. */ 43 | /* ----------------------------------------------------------------- */ 44 | -------------------------------------------------------------------------------- /src/INSTALL: -------------------------------------------------------------------------------- 1 | Installation Instructions 2 | ************************* 3 | 4 | 1. Cd to ./src directory. 5 | 6 | % cd src 7 | 8 | 2. Create "build" directory and cd to ./src/build. 9 | 10 | % mkdir -p build && cd build 11 | 12 | 3. Run "cmake" to create Makefile and "make" to compile. 13 | 14 | % cmake -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=ON .. 15 | % make -j 16 | 17 | 4. Install library and binary. 18 | 19 | % make install -------------------------------------------------------------------------------- /src/Makefile.am: -------------------------------------------------------------------------------- 1 | 2 | EXTRA_DIST = AUTHORS COPYING ChangeLog INSTALL NEWS README Makefile.mak 3 | 4 | SUBDIRS = lib bin 5 | 6 | include_HEADERS = include/HTS_engine.h 7 | 8 | DISTCLEANFILES = *.log *.out *~ config/*~ include/*~ 9 | 10 | MAINTAINERCLEANFILES = aclocal.m4 configure Makefile.in config/compile \ 11 | config/config.guess config/config.sub config/depcomp \ 12 | config/install-sh config/missing 13 | -------------------------------------------------------------------------------- /src/Makefile.mak: -------------------------------------------------------------------------------- 1 | 2 | INSTALLDIR = C:\hts_engine_API 3 | 4 | all: 5 | cd lib 6 | nmake /f Makefile.mak 7 | cd .. 8 | cd bin 9 | nmake /f Makefile.mak 10 | cd .. 11 | 12 | clean: 13 | cd lib 14 | nmake /f Makefile.mak clean 15 | cd .. 16 | cd bin 17 | nmake /f Makefile.mak clean 18 | cd .. 19 | 20 | install:: 21 | @if not exist "$(INSTALLDIR)\lib" mkdir "$(INSTALLDIR)\lib" 22 | cd lib 23 | copy *.lib $(INSTALLDIR)\lib 24 | cd .. 25 | @if not exist "$(INSTALLDIR)\bin" mkdir "$(INSTALLDIR)\bin" 26 | cd bin 27 | copy *.exe $(INSTALLDIR)\bin 28 | cd .. 29 | @if not exist "$(INSTALLDIR)\include" mkdir "$(INSTALLDIR)\include" 30 | cd include 31 | copy *.h $(INSTALLDIR)\include 32 | cd .. 33 | -------------------------------------------------------------------------------- /src/NEWS: -------------------------------------------------------------------------------- 1 | Version 1.09: 2 | * add '-g' option to change volume. 3 | * add some functions to get fullcontext label format and version defined in HTS voice. 4 | * support 64-bit Windows audio. 5 | * bug fixes. 6 | 7 | Version 1.08: 8 | * support 64-bit machine. 9 | * re-write excitation function. 10 | * add some functions to get generated parameters and number of frames. 11 | * add some functions for separating synthesis step. 12 | * bug fixes. 13 | 14 | Version 1.07: 15 | * support new HTS voice format. 16 | * add LSP postfilter. 17 | * change volume unit to DB. 18 | * add function to get generated speech. 19 | * bug fixes. 20 | 21 | Version 1.06: 22 | * modify state duration calculation algorithm. 23 | * change many function from void to boolean. 24 | * change source format. 25 | * bug fixes. 26 | 27 | Version 1.05: 28 | * support PortAudio for audio output. 29 | 30 | Version 1.04: 31 | * support C++ compiler. 32 | * add low-pass filter stream. 33 | * change definition of GV weights. 34 | * add stop switch. 35 | * add volume controller. 36 | * bug fixes. 37 | 38 | Version 1.03: 39 | * add '-z' option to control audio buffer size. 40 | * change PDF file format. 41 | * bug fixes. 42 | 43 | Version 1.02: 44 | * context-dependent GV without silent and pause phoneme. 45 | * buffer size control of audio output for Windows. 46 | * makefiles for nmake of VC. 47 | * save detail information. 48 | * support singing voice synthesis. 49 | * bug fixes. 50 | 51 | Version 1.01: 52 | * bug fixes. 53 | * transfer site to SourceForge. 54 | 55 | Version 1.00: 56 | * bug fixes and performance improvements. 57 | * support linear gain in addition to log gain for LSP-type parameters. 58 | * first stable release. 59 | 60 | Version 0.99: 61 | * bug fixes. 62 | * switch license to the New and Simplified BSD license. 63 | 64 | Version 0.96: 65 | * bug fixes. 66 | * support flexible model structure. 67 | * support LSP-type parameters. 68 | 69 | Version 0.95: 70 | * support GV. 71 | 72 | Version 0.9: 73 | * implement API-style hts_engine. 74 | * support speaker-interpolation. 75 | -------------------------------------------------------------------------------- /src/README: -------------------------------------------------------------------------------- 1 | =============================================================================== 2 | The HMM-Based Speech Synthesis Engine "hts_engine API" version 1.09 3 | release December 25, 2014 4 | 5 | 6 | The hts_engine API is an API version of hts_engine which has been released 7 | since HTS version 1.1. It has been being developed by the HTS working group 8 | (see "Who we are" below) and some graduate students in Nagoya Institute of 9 | Technology (see "AUTHORS" in the same directory). 10 | 11 | ******************************************************************************* 12 | Copying 13 | ******************************************************************************* 14 | 15 | The hts_engine API is released under the Modified BSD license (see 16 | http://www.opensource.org/). Using and distributing this software is free 17 | (without restriction including without limitation the rights to use, copy, 18 | modify, merge, publish, distribute, sublicense, and/or sell copies of this 19 | work, and to permit persons to whom this work is furnished to do so) subject to 20 | the conditions in the following license: 21 | 22 | /* ----------------------------------------------------------------- */ 23 | /* The HMM-Based Speech Synthesis Engine "hts_engine API" */ 24 | /* developed by HTS Working Group */ 25 | /* http://hts-engine.sourceforge.net/ */ 26 | /* ----------------------------------------------------------------- */ 27 | /* */ 28 | /* Copyright (c) 2001-2014 Nagoya Institute of Technology */ 29 | /* Department of Computer Science */ 30 | /* */ 31 | /* 2001-2008 Tokyo Institute of Technology */ 32 | /* Interdisciplinary Graduate School of */ 33 | /* Science and Engineering */ 34 | /* */ 35 | /* All rights reserved. */ 36 | /* */ 37 | /* Redistribution and use in source and binary forms, with or */ 38 | /* without modification, are permitted provided that the following */ 39 | /* conditions are met: */ 40 | /* */ 41 | /* - Redistributions of source code must retain the above copyright */ 42 | /* notice, this list of conditions and the following disclaimer. */ 43 | /* - Redistributions in binary form must reproduce the above */ 44 | /* copyright notice, this list of conditions and the following */ 45 | /* disclaimer in the documentation and/or other materials provided */ 46 | /* with the distribution. */ 47 | /* - Neither the name of the HTS working group nor the names of its */ 48 | /* contributors may be used to endorse or promote products derived */ 49 | /* from this software without specific prior written permission. */ 50 | /* */ 51 | /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */ 52 | /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */ 53 | /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ 54 | /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ 55 | /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */ 56 | /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, */ 57 | /* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED */ 58 | /* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, */ 59 | /* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */ 60 | /* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, */ 61 | /* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY */ 62 | /* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ 63 | /* POSSIBILITY OF SUCH DAMAGE. */ 64 | /* ----------------------------------------------------------------- */ 65 | 66 | Although this software is free, we still offer no warranties and no 67 | maintenance. We will continue to endeavor to fix bugs and answer queries when 68 | can, but are not in a position to guarantee it. We will consider consultancy if 69 | desired, please contacts us for details. 70 | 71 | If you are using the hts_engine API in commercial environments, even though no 72 | license is required, we would be grateful if you let us know as it helps 73 | justify ourselves to our various sponsors. We also strongly encourage you to 74 | 75 | * refer to the use of hts_engine API in any publications that use this 76 | software 77 | * report bugs, where possible with bug fixes, that are found 78 | 79 | See also "COPYING" file in the current directory for details. 80 | 81 | ******************************************************************************* 82 | Installation 83 | ******************************************************************************* 84 | 85 | See "INSTALL" in the same directory for details. 86 | 87 | ******************************************************************************* 88 | Documentation 89 | ******************************************************************************* 90 | 91 | Reference manual of hts_engine API is available at 92 | 93 | http://hts-engine.sourceforge.net/ 94 | 95 | ******************************************************************************* 96 | Acknowledgements 97 | ******************************************************************************* 98 | 99 | Keiichi Tokuda 100 | Shinji Sako 101 | Heiga Zen 102 | Keiichiro Oura 103 | Kazuhiro Nakamura 104 | Keijiro Saino 105 | 106 | ******************************************************************************* 107 | Who we are 108 | ******************************************************************************* 109 | 110 | The HTS working group is a voluntary group for developing the HMM-Based Speech 111 | Synthesis System. Current members are 112 | 113 | Keiichi Tokuda http://www.sp.nitech.ac.jp/~tokuda/ 114 | (Produce and Design) 115 | Keiichiro Oura http://www.sp.nitech.ac.jp/~uratec/ 116 | (Design and Development, Main Maintainer) 117 | Kei Hashimoto http://www.sp.nitech.ac.jp/~bonanza/ 118 | Sayaka Shiota http://www.sp.nitech.ac.jp/~sayaka/ 119 | Shinji Takaki http://www.sp.nitech.ac.jp/~k-prr44/ 120 | Heiga Zen 121 | Junichi Yamagishi http://homepages.inf.ed.ac.uk/jyamagis/ 122 | Tomoki Toda http://spalab.naist.jp/~tomoki/index_e.html 123 | Takashi Nose 124 | Shinji Sako http://www.mmsp.nitech.ac.jp/~sako/ 125 | Alan W. Black http://www.cs.cmu.edu/~awb/ 126 | 127 | and the members are dynamically changing. The current formal contact address of 128 | HTS working group and a mailing list for HTS users can be found at 129 | http://hts.sp.nitech.ac.jp/ 130 | =============================================================================== 131 | -------------------------------------------------------------------------------- /src/bin/.cvsignore: -------------------------------------------------------------------------------- 1 | Makefile.in 2 | hts_engine 3 | Makefile 4 | .deps 5 | -------------------------------------------------------------------------------- /src/bin/Makefile.am: -------------------------------------------------------------------------------- 1 | 2 | EXTRA_DIST = Makefile.mak 3 | 4 | AM_CPPFLAGS = -I @top_srcdir@/include 5 | 6 | bin_PROGRAMS = hts_engine 7 | 8 | hts_engine_SOURCES = hts_engine.c 9 | 10 | hts_engine_LDADD = ../lib/libHTSEngine.a 11 | 12 | DISTCLEANFILES = *.log *.out *~ 13 | 14 | MAINTAINERCLEANFILES = Makefile.in 15 | -------------------------------------------------------------------------------- /src/bin/Makefile.mak: -------------------------------------------------------------------------------- 1 | 2 | CC = cl 3 | CL = link 4 | 5 | CFLAGS = /O2 /Ob2 /Oi /Ot /Oy /GT /GL /TC /I ..\include 6 | LFLAGS = /LTCG 7 | 8 | LIBS = ..\lib\hts_engine_API.lib winmm.lib 9 | 10 | all: hts_engine.exe 11 | 12 | hts_engine.exe : hts_engine.obj 13 | $(CC) $(CFLAGS) /c $(@B).c 14 | $(CL) $(LFLAGS) /OUT:$@ $(LIBS) $(@B).obj 15 | 16 | clean: 17 | del *.exe 18 | del *.obj 19 | -------------------------------------------------------------------------------- /src/bin/hts_engine.c: -------------------------------------------------------------------------------- 1 | /* ----------------------------------------------------------------- */ 2 | /* The HMM-Based Speech Synthesis Engine "hts_engine API" */ 3 | /* developed by HTS Working Group */ 4 | /* http://hts-engine.sourceforge.net/ */ 5 | /* ----------------------------------------------------------------- */ 6 | /* */ 7 | /* Copyright (c) 2001-2014 Nagoya Institute of Technology */ 8 | /* Department of Computer Science */ 9 | /* */ 10 | /* 2001-2008 Tokyo Institute of Technology */ 11 | /* Interdisciplinary Graduate School of */ 12 | /* Science and Engineering */ 13 | /* */ 14 | /* All rights reserved. */ 15 | /* */ 16 | /* Redistribution and use in source and binary forms, with or */ 17 | /* without modification, are permitted provided that the following */ 18 | /* conditions are met: */ 19 | /* */ 20 | /* - Redistributions of source code must retain the above copyright */ 21 | /* notice, this list of conditions and the following disclaimer. */ 22 | /* - Redistributions in binary form must reproduce the above */ 23 | /* copyright notice, this list of conditions and the following */ 24 | /* disclaimer in the documentation and/or other materials provided */ 25 | /* with the distribution. */ 26 | /* - Neither the name of the HTS working group nor the names of its */ 27 | /* contributors may be used to endorse or promote products derived */ 28 | /* from this software without specific prior written permission. */ 29 | /* */ 30 | /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */ 31 | /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */ 32 | /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ 33 | /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ 34 | /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */ 35 | /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, */ 36 | /* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED */ 37 | /* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, */ 38 | /* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */ 39 | /* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, */ 40 | /* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY */ 41 | /* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ 42 | /* POSSIBILITY OF SUCH DAMAGE. */ 43 | /* ----------------------------------------------------------------- */ 44 | 45 | #ifndef HTS_ENGINE_C 46 | #define HTS_ENGINE_C 47 | 48 | #ifdef __cplusplus 49 | #define HTS_ENGINE_C_START extern "C" { 50 | #define HTS_ENGINE_C_END } 51 | #else 52 | #define HTS_ENGINE_C_START 53 | #define HTS_ENGINE_C_END 54 | #endif /* __CPLUSPLUS */ 55 | 56 | HTS_ENGINE_C_START; 57 | 58 | #include 59 | 60 | #include "HTS_engine.h" 61 | 62 | /* usage: output usage */ 63 | void usage(void) 64 | { 65 | fprintf(stderr, "%s\n", HTS_COPYRIGHT); 66 | fprintf(stderr, "hts_engine - The HMM-based speech synthesis engine \"hts_engine API\"\n"); 67 | fprintf(stderr, "\n"); 68 | fprintf(stderr, " usage:\n"); 69 | fprintf(stderr, " hts_engine [ options ] [ infile ]\n"); 70 | fprintf(stderr, " options: [ def][ min-- max]\n"); 71 | fprintf(stderr, " -m htsvoice : HTS voice files [ N/A]\n"); 72 | fprintf(stderr, " -od s : filename of output label with duration [ N/A]\n"); 73 | fprintf(stderr, " -om s : filename of output spectrum [ N/A]\n"); 74 | fprintf(stderr, " -of s : filename of output log F0 [ N/A]\n"); 75 | fprintf(stderr, " -ol s : filename of output low-pass filter [ N/A]\n"); 76 | fprintf(stderr, " -or s : filename of output raw audio (generated speech) [ N/A]\n"); 77 | fprintf(stderr, " -ow s : filename of output wav audio (generated speech) [ N/A]\n"); 78 | fprintf(stderr, " -ot s : filename of output trace information [ N/A]\n"); 79 | fprintf(stderr, " -vp : use phoneme alignment for duration [ N/A]\n"); 80 | fprintf(stderr, " -i i f1 .. fi : enable interpolation & specify number(i),coefficient(f) [ N/A]\n"); 81 | fprintf(stderr, " -s i : sampling frequency [ auto][ 1-- ]\n"); 82 | fprintf(stderr, " -p i : frame period (point) [ auto][ 1-- ]\n"); 83 | fprintf(stderr, " -a f : all-pass constant [ auto][ 0.0-- 1.0]\n"); 84 | fprintf(stderr, " -b f : postfiltering coefficient [ 0.0][ 0.0-- 1.0]\n"); 85 | fprintf(stderr, " -r f : speech speed rate [ 1.0][ 0.0-- ]\n"); 86 | fprintf(stderr, " -fm f : additional half-tone [ 0.0][ -- ]\n"); 87 | fprintf(stderr, " -u f : voiced/unvoiced threshold [ 0.5][ 0.0-- 1.0]\n"); 88 | fprintf(stderr, " -jm f : weight of GV for spectrum [ 1.0][ 0.0-- ]\n"); 89 | fprintf(stderr, " -jf f : weight of GV for log F0 [ 1.0][ 0.0-- ]\n"); 90 | fprintf(stderr, " -g f : volume (dB) [ 0.0][ -- ]\n"); 91 | fprintf(stderr, " -z i : audio buffer size (if i==0, turn off) [ 0][ 0-- ]\n"); 92 | fprintf(stderr, " infile:\n"); 93 | fprintf(stderr, " label file\n"); 94 | fprintf(stderr, " note:\n"); 95 | fprintf(stderr, " generated spectrum, log F0, and low-pass filter coefficient\n"); 96 | fprintf(stderr, " sequences are saved in natural endian, binary (float) format.\n"); 97 | fprintf(stderr, "\n"); 98 | 99 | exit(0); 100 | } 101 | 102 | int main(int argc, char **argv) 103 | { 104 | int i; 105 | double f; 106 | 107 | /* hts_engine API */ 108 | HTS_Engine engine; 109 | 110 | /* HTS voices */ 111 | size_t num_voices; 112 | char **fn_voices; 113 | 114 | /* input label file name */ 115 | char *labfn = NULL; 116 | 117 | /* output file pointers */ 118 | FILE *durfp = NULL, *mgcfp = NULL, *lf0fp = NULL, *lpffp = NULL, *wavfp = NULL, *rawfp = NULL, *tracefp = NULL; 119 | 120 | /* interpolation weights */ 121 | size_t num_interpolation_weights; 122 | 123 | /* output usage */ 124 | if (argc <= 1) 125 | usage(); 126 | 127 | /* initialize hts_engine API */ 128 | HTS_Engine_initialize(&engine); 129 | 130 | /* get HTS voice file names */ 131 | num_voices = 0; 132 | fn_voices = (char **) malloc(argc * sizeof(char *)); 133 | for (i = 0; i < argc; i++) { 134 | if (argv[i][0] == '-' && argv[i][1] == 'm') 135 | fn_voices[num_voices++] = argv[++i]; 136 | if (argv[i][0] == '-' && argv[i][1] == 'h') 137 | usage(); 138 | } 139 | if (num_voices == 0) { 140 | fprintf(stderr, "Error: HTS voice must be specified.\n"); 141 | free(fn_voices); 142 | exit(1); 143 | } 144 | 145 | /* load HTS voices */ 146 | if (HTS_Engine_load(&engine, fn_voices, num_voices) != TRUE) { 147 | fprintf(stderr, "Error: HTS voices cannot be loaded.\n"); 148 | free(fn_voices); 149 | HTS_Engine_clear(&engine); 150 | exit(1); 151 | } 152 | free(fn_voices); 153 | 154 | /* get options */ 155 | while (--argc) { 156 | if (**++argv == '-') { 157 | switch (*(*argv + 1)) { 158 | case 'v': 159 | switch (*(*argv + 2)) { 160 | case 'p': 161 | HTS_Engine_set_phoneme_alignment_flag(&engine, TRUE); 162 | break; 163 | default: 164 | fprintf(stderr, "Error: Invalid option '-v%c'.\n", *(*argv + 2)); 165 | HTS_Engine_clear(&engine); 166 | exit(1); 167 | } 168 | break; 169 | case 'o': 170 | switch (*(*argv + 2)) { 171 | case 'w': 172 | wavfp = fopen(*++argv, "wb"); 173 | break; 174 | case 'r': 175 | rawfp = fopen(*++argv, "wb"); 176 | break; 177 | case 'd': 178 | durfp = fopen(*++argv, "wt"); 179 | break; 180 | case 'm': 181 | mgcfp = fopen(*++argv, "wb"); 182 | break; 183 | case 'f': 184 | case 'p': 185 | lf0fp = fopen(*++argv, "wb"); 186 | break; 187 | case 'l': 188 | lpffp = fopen(*++argv, "wb"); 189 | break; 190 | case 't': 191 | tracefp = fopen(*++argv, "wt"); 192 | break; 193 | default: 194 | fprintf(stderr, "Error: Invalid option '-o%c'.\n", *(*argv + 2)); 195 | HTS_Engine_clear(&engine); 196 | exit(1); 197 | } 198 | --argc; 199 | break; 200 | case 'h': 201 | usage(); 202 | break; 203 | case 'm': 204 | argv++; /* HTS voices were already loaded */ 205 | --argc; 206 | break; 207 | case 's': 208 | HTS_Engine_set_sampling_frequency(&engine, (size_t) atoi(*++argv)); 209 | --argc; 210 | break; 211 | case 'p': 212 | HTS_Engine_set_fperiod(&engine, (size_t) atoi(*++argv)); 213 | --argc; 214 | break; 215 | case 'a': 216 | HTS_Engine_set_alpha(&engine, atof(*++argv)); 217 | --argc; 218 | break; 219 | case 'b': 220 | HTS_Engine_set_beta(&engine, atof(*++argv)); 221 | --argc; 222 | break; 223 | case 'r': 224 | HTS_Engine_set_speed(&engine, atof(*++argv)); 225 | --argc; 226 | break; 227 | case 'f': 228 | switch (*(*argv + 2)) { 229 | case 'm': 230 | HTS_Engine_add_half_tone(&engine, atof(*++argv)); 231 | break; 232 | default: 233 | fprintf(stderr, "Error: Invalid option '-f%c'.\n", *(*argv + 2)); 234 | HTS_Engine_clear(&engine); 235 | exit(1); 236 | } 237 | --argc; 238 | break; 239 | case 'u': 240 | HTS_Engine_set_msd_threshold(&engine, 1, atof(*++argv)); 241 | --argc; 242 | break; 243 | case 'i': 244 | num_interpolation_weights = atoi(*++argv); 245 | argc--; 246 | if (num_interpolation_weights != num_voices) { 247 | HTS_Engine_clear(&engine); 248 | exit(1); 249 | } 250 | for (i = 0; i < num_interpolation_weights; i++) { 251 | f = atof(*++argv); 252 | argc--; 253 | HTS_Engine_set_duration_interpolation_weight(&engine, i, f); 254 | HTS_Engine_set_parameter_interpolation_weight(&engine, i, 0, f); 255 | HTS_Engine_set_parameter_interpolation_weight(&engine, i, 1, f); 256 | HTS_Engine_set_gv_interpolation_weight(&engine, i, 0, f); 257 | HTS_Engine_set_gv_interpolation_weight(&engine, i, 1, f); 258 | } 259 | break; 260 | case 'j': 261 | switch (*(*argv + 2)) { 262 | case 'm': 263 | HTS_Engine_set_gv_weight(&engine, 0, atof(*++argv)); 264 | break; 265 | case 'f': 266 | case 'p': 267 | HTS_Engine_set_gv_weight(&engine, 1, atof(*++argv)); 268 | break; 269 | default: 270 | fprintf(stderr, "Error: Invalid option '-j%c'.\n", *(*argv + 2)); 271 | HTS_Engine_clear(&engine); 272 | exit(1); 273 | } 274 | --argc; 275 | break; 276 | case 'g': 277 | HTS_Engine_set_volume(&engine, atof(*++argv)); 278 | --argc; 279 | break; 280 | case 'z': 281 | HTS_Engine_set_audio_buff_size(&engine, (size_t) atoi(*++argv)); 282 | --argc; 283 | break; 284 | default: 285 | fprintf(stderr, "Error: Invalid option '-%c'.\n", *(*argv + 1)); 286 | HTS_Engine_clear(&engine); 287 | exit(1); 288 | } 289 | } else { 290 | labfn = *argv; 291 | } 292 | } 293 | 294 | /* synthesize */ 295 | if (HTS_Engine_synthesize_from_fn(&engine, labfn) != TRUE) { 296 | fprintf(stderr, "Error: waveform cannot be synthesized.\n"); 297 | HTS_Engine_clear(&engine); 298 | exit(1); 299 | } 300 | 301 | /* output */ 302 | if (tracefp != NULL) 303 | HTS_Engine_save_information(&engine, tracefp); 304 | if (durfp != NULL) 305 | HTS_Engine_save_label(&engine, durfp); 306 | if (rawfp) 307 | HTS_Engine_save_generated_speech(&engine, rawfp); 308 | if (wavfp) 309 | HTS_Engine_save_riff(&engine, wavfp); 310 | if (mgcfp) 311 | HTS_Engine_save_generated_parameter(&engine, 0, mgcfp); 312 | if (lf0fp) 313 | HTS_Engine_save_generated_parameter(&engine, 1, lf0fp); 314 | if (lpffp) 315 | HTS_Engine_save_generated_parameter(&engine, 2, lpffp); 316 | 317 | /* reset */ 318 | HTS_Engine_refresh(&engine); 319 | 320 | /* free memory */ 321 | HTS_Engine_clear(&engine); 322 | 323 | /* close files */ 324 | if (durfp != NULL) 325 | fclose(durfp); 326 | if (mgcfp != NULL) 327 | fclose(mgcfp); 328 | if (lf0fp != NULL) 329 | fclose(lf0fp); 330 | if (lpffp != NULL) 331 | fclose(lpffp); 332 | if (wavfp != NULL) 333 | fclose(wavfp); 334 | if (rawfp != NULL) 335 | fclose(rawfp); 336 | if (tracefp != NULL) 337 | fclose(tracefp); 338 | 339 | return 0; 340 | } 341 | 342 | HTS_ENGINE_C_END; 343 | 344 | #endif /* !HTS_ENGINE_C */ 345 | -------------------------------------------------------------------------------- /src/config/.cvsignore: -------------------------------------------------------------------------------- 1 | depcomp 2 | install-sh 3 | missing 4 | config.guess 5 | config.sub 6 | -------------------------------------------------------------------------------- /src/configure.ac: -------------------------------------------------------------------------------- 1 | # -*- Autoconf -*- 2 | # Process this file with autoconf to produce a configure script. 3 | 4 | AC_PREREQ(2.59) 5 | AC_INIT(hts_engine_API, 1.09, hts-engine-users@lists.sourceforge.net, hts_engine_API) 6 | AC_CONFIG_AUX_DIR([config]) 7 | AC_COPYRIGHT(Copyright 2001-2014 Nagoya Institute of Technology) 8 | AC_COPYRIGHT(Copyright 2001-2008 Tokyo Institute of Technology) 9 | AM_INIT_AUTOMAKE 10 | 11 | # Checks for C compiler 12 | AC_PROG_CC 13 | AM_PROG_CC_C_O 14 | AC_PROG_INSTALL 15 | AC_PROG_RANLIB 16 | AN_MAKEVAR([AR], [AC_PROG_AR]) 17 | AN_PROGRAM([ar], [AC_PROG_AR]) 18 | AC_DEFUN([AC_PROG_AR], [AC_CHECK_TOOL(AR, ar, :)]) 19 | AC_PROG_AR 20 | 21 | 22 | # Checks for libraries. 23 | AC_CHECK_LIB([m], [log]) 24 | 25 | 26 | # Checks for header files. 27 | AC_HEADER_STDC 28 | AC_CHECK_HEADERS([stdlib.h string.h]) 29 | 30 | 31 | # Checks for typedefs, structures, and compiler characteristics. 32 | AC_C_CONST 33 | AC_TYPE_SIZE_T 34 | 35 | # Checks for library functions. 36 | AC_FUNC_VPRINTF 37 | AC_CHECK_FUNCS([sqrt strchr strrchr strstr]) 38 | 39 | 40 | # Checks for embedded device compile 41 | AC_ARG_ENABLE(embedded, [ --enable-embedded turn on compiling for embedded devices (default=no)],,enable_embedded=no) 42 | AC_MSG_CHECKING(whether to enable compiling for embedded devices) 43 | if test x$enable_embedded = xyes; then 44 | AC_MSG_RESULT(yes) 45 | AC_DEFINE(HTS_EMBEDDED) 46 | else 47 | AC_MSG_RESULT(no) 48 | fi 49 | 50 | 51 | # Checks for using festival 52 | AC_ARG_ENABLE(festival, [ --enable-festival use memory allocation/free functions of speech tools (default=no)],,enable_festival=no) 53 | AC_MSG_CHECKING(whether to use memory allocation/free functions of speech tools) 54 | if test x$enable_festival = xyes; then 55 | AC_MSG_RESULT(yes) 56 | AC_DEFINE(FESTIVAL) 57 | else 58 | AC_MSG_RESULT(no) 59 | fi 60 | 61 | 62 | AC_CANONICAL_HOST 63 | AC_C_BIGENDIAN 64 | 65 | 66 | # Checks library for windows audio devices 67 | case "$host_os" in 68 | *win32* | *wince* | *cygwin* | *mingw* ) 69 | AC_HAVE_LIBRARY([winmm],,AC_MSG_ERROR(No winmm)) 70 | ;; 71 | *) 72 | ;; 73 | esac 74 | 75 | 76 | AC_CONFIG_FILES([Makefile bin/Makefile lib/Makefile]) 77 | 78 | AC_OUTPUT 79 | 80 | -------------------------------------------------------------------------------- /src/hts_engine_API.pc.in: -------------------------------------------------------------------------------- 1 | prefix=@CMAKE_INSTALL_PREFIX@ 2 | exec_prefix=@CMAKE_INSTALL_PREFIX@ 3 | libdir=${prefix}/lib 4 | includedir=${prefix}/include 5 | 6 | Name: @CMAKE_PROJECT_NAME@ 7 | Description: hts_engine API - a library to synthesize speech waveform from HMMs trained by the HMM-based speech synthesis system (HTS). 8 | Version: @CMAKE_PROJECT_VERSION@ 9 | Cflags: -I${includedir} 10 | Libs: -L${libdir} -l@CMAKE_PROJECT_NAME@ 11 | -------------------------------------------------------------------------------- /src/include/HTS_engine.h: -------------------------------------------------------------------------------- 1 | /* ----------------------------------------------------------------- */ 2 | /* The HMM-Based Speech Synthesis Engine "hts_engine API" */ 3 | /* developed by HTS Working Group */ 4 | /* http://hts-engine.sourceforge.net/ */ 5 | /* ----------------------------------------------------------------- */ 6 | /* */ 7 | /* Copyright (c) 2001-2014 Nagoya Institute of Technology */ 8 | /* Department of Computer Science */ 9 | /* */ 10 | /* 2001-2008 Tokyo Institute of Technology */ 11 | /* Interdisciplinary Graduate School of */ 12 | /* Science and Engineering */ 13 | /* */ 14 | /* All rights reserved. */ 15 | /* */ 16 | /* Redistribution and use in source and binary forms, with or */ 17 | /* without modification, are permitted provided that the following */ 18 | /* conditions are met: */ 19 | /* */ 20 | /* - Redistributions of source code must retain the above copyright */ 21 | /* notice, this list of conditions and the following disclaimer. */ 22 | /* - Redistributions in binary form must reproduce the above */ 23 | /* copyright notice, this list of conditions and the following */ 24 | /* disclaimer in the documentation and/or other materials provided */ 25 | /* with the distribution. */ 26 | /* - Neither the name of the HTS working group nor the names of its */ 27 | /* contributors may be used to endorse or promote products derived */ 28 | /* from this software without specific prior written permission. */ 29 | /* */ 30 | /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */ 31 | /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */ 32 | /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ 33 | /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ 34 | /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */ 35 | /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, */ 36 | /* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED */ 37 | /* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, */ 38 | /* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */ 39 | /* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, */ 40 | /* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY */ 41 | /* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ 42 | /* POSSIBILITY OF SUCH DAMAGE. */ 43 | /* ----------------------------------------------------------------- */ 44 | 45 | #ifndef HTS_ENGINE_H 46 | #define HTS_ENGINE_H 47 | 48 | #ifdef __cplusplus 49 | #define HTS_ENGINE_H_START extern "C" { 50 | #define HTS_ENGINE_H_END } 51 | #else 52 | #define HTS_ENGINE_H_START 53 | #define HTS_ENGINE_H_END 54 | #endif /* __CPLUSPLUS */ 55 | 56 | HTS_ENGINE_H_START; 57 | 58 | #include 59 | 60 | #ifndef HTS_ENGINE_DLLEXPORT_H_ 61 | #define HTS_ENGINE_DLLEXPORT_H_ 62 | 63 | #ifndef DLLEXPORT 64 | # ifdef _WIN32 65 | # define DLLEXPORT __declspec(dllexport) 66 | # else 67 | # define DLLEXPORT 68 | # endif 69 | #endif 70 | #endif 71 | 72 | /* common ---------------------------------------------------------- */ 73 | 74 | typedef char HTS_Boolean; 75 | 76 | #ifndef TRUE 77 | #define TRUE 1 78 | #endif /* !TRUE */ 79 | 80 | #ifndef FALSE 81 | #define FALSE 0 82 | #endif /* !FALSE */ 83 | 84 | #ifndef HTS_NODATA 85 | #define HTS_NODATA (-1.0e+10) 86 | #endif /* HTS_NODATA */ 87 | 88 | /* copyright ------------------------------------------------------- */ 89 | 90 | #define HTS_COPYRIGHT "The HMM-Based Speech Synthesis Engine \"hts_engine API\"\nVersion 1.09 (http://hts-engine.sourceforge.net/)\nCopyright (C) 2001-2014 Nagoya Institute of Technology\n 2001-2008 Tokyo Institute of Technology\nAll rights reserved.\n" 91 | 92 | /* audio ----------------------------------------------------------- */ 93 | 94 | /* HTS_Audio: audio output wrapper */ 95 | typedef struct _HTS_Audio { 96 | size_t sampling_frequency; /* sampling frequency */ 97 | size_t max_buff_size; /* buffer size for audio output interface */ 98 | short *buff; /* current buffer */ 99 | size_t buff_size; /* current buffer size */ 100 | void *audio_interface; /* audio interface specified in compile step */ 101 | } HTS_Audio; 102 | 103 | /* model ----------------------------------------------------------- */ 104 | 105 | /* HTS_Window: window coefficients to calculate dynamic features. */ 106 | typedef struct _HTS_Window { 107 | size_t size; /* # of windows (static + deltas) */ 108 | int *l_width; /* left width of windows */ 109 | int *r_width; /* right width of windows */ 110 | double **coefficient; /* window coefficient */ 111 | size_t max_width; /* maximum width of windows */ 112 | } HTS_Window; 113 | 114 | /* HTS_Pattern: list of patterns in a question and a tree. */ 115 | typedef struct _HTS_Pattern { 116 | char *string; /* pattern string */ 117 | struct _HTS_Pattern *next; /* pointer to the next pattern */ 118 | } HTS_Pattern; 119 | 120 | /* HTS_Question: list of questions in a tree. */ 121 | typedef struct _HTS_Question { 122 | char *string; /* name of this question */ 123 | HTS_Pattern *head; /* pointer to the head of pattern list */ 124 | struct _HTS_Question *next; /* pointer to the next question */ 125 | } HTS_Question; 126 | 127 | /* HTS_Node: list of tree nodes in a tree. */ 128 | typedef struct _HTS_Node { 129 | int index; /* index of this node */ 130 | size_t pdf; /* index of PDF for this node (leaf node only) */ 131 | struct _HTS_Node *yes; /* pointer to its child node (yes) */ 132 | struct _HTS_Node *no; /* pointer to its child node (no) */ 133 | struct _HTS_Node *next; /* pointer to the next node */ 134 | HTS_Question *quest; /* question applied at this node */ 135 | } HTS_Node; 136 | 137 | /* HTS_Tree: list of decision trees in a model. */ 138 | typedef struct _HTS_Tree { 139 | HTS_Pattern *head; /* pointer to the head of pattern list for this tree */ 140 | struct _HTS_Tree *next; /* pointer to next tree */ 141 | HTS_Node *root; /* root node of this tree */ 142 | size_t state; /* state index of this tree */ 143 | } HTS_Tree; 144 | 145 | /* HTS_Model: set of PDFs, decision trees and questions. */ 146 | typedef struct _HTS_Model { 147 | size_t vector_length; /* vector length (static features only) */ 148 | size_t num_windows; /* # of windows for delta */ 149 | HTS_Boolean is_msd; /* flag for MSD */ 150 | size_t ntree; /* # of trees */ 151 | size_t *npdf; /* # of PDFs at each tree */ 152 | float ***pdf; /* PDFs */ 153 | HTS_Tree *tree; /* pointer to the list of trees */ 154 | HTS_Question *question; /* pointer to the list of questions */ 155 | } HTS_Model; 156 | 157 | /* HTS_ModelSet: set of duration models, HMMs and GV models. */ 158 | typedef struct _HTS_ModelSet { 159 | char *hts_voice_version; /* version of HTS voice format */ 160 | size_t sampling_frequency; /* sampling frequency */ 161 | size_t frame_period; /* frame period */ 162 | size_t num_voices; /* # of HTS voices */ 163 | size_t num_states; /* # of HMM states */ 164 | size_t num_streams; /* # of streams */ 165 | char *stream_type; /* stream type */ 166 | char *fullcontext_format; /* fullcontext label format */ 167 | char *fullcontext_version; /* version of fullcontext label */ 168 | HTS_Question *gv_off_context; /* GV switch */ 169 | char **option; /* options for each stream */ 170 | HTS_Model *duration; /* duration PDFs and trees */ 171 | HTS_Window *window; /* window coefficients for delta */ 172 | HTS_Model **stream; /* parameter PDFs and trees */ 173 | HTS_Model **gv; /* GV PDFs and trees */ 174 | } HTS_ModelSet; 175 | 176 | /* label ----------------------------------------------------------- */ 177 | 178 | /* HTS_LabelString: individual label string with time information */ 179 | typedef struct _HTS_LabelString { 180 | struct _HTS_LabelString *next; /* pointer to next label string */ 181 | char *name; /* label string */ 182 | double start; /* start frame specified in the given label */ 183 | double end; /* end frame specified in the given label */ 184 | } HTS_LabelString; 185 | 186 | /* HTS_Label: list of label strings */ 187 | typedef struct _HTS_Label { 188 | HTS_LabelString *head; /* pointer to the head of label string */ 189 | size_t size; /* # of label strings */ 190 | } HTS_Label; 191 | 192 | /* sstream --------------------------------------------------------- */ 193 | 194 | /* HTS_SStream: individual state stream */ 195 | typedef struct _HTS_SStream { 196 | size_t vector_length; /* vector length (static features only) */ 197 | double **mean; /* mean vector sequence */ 198 | double **vari; /* variance vector sequence */ 199 | double *msd; /* MSD parameter sequence */ 200 | size_t win_size; /* # of windows (static + deltas) */ 201 | int *win_l_width; /* left width of windows */ 202 | int *win_r_width; /* right width of windows */ 203 | double **win_coefficient; /* window cofficients */ 204 | size_t win_max_width; /* maximum width of windows */ 205 | double *gv_mean; /* mean vector of GV */ 206 | double *gv_vari; /* variance vector of GV */ 207 | HTS_Boolean *gv_switch; /* GV flag sequence */ 208 | } HTS_SStream; 209 | 210 | /* HTS_SStreamSet: set of state stream */ 211 | typedef struct _HTS_SStreamSet { 212 | HTS_SStream *sstream; /* state streams */ 213 | size_t nstream; /* # of streams */ 214 | size_t nstate; /* # of states */ 215 | size_t *duration; /* duration sequence */ 216 | size_t total_state; /* total state */ 217 | size_t total_frame; /* total frame */ 218 | } HTS_SStreamSet; 219 | 220 | /* pstream --------------------------------------------------------- */ 221 | 222 | /* HTS_SMatrices: matrices/vectors used in the speech parameter generation algorithm. */ 223 | typedef struct _HTS_SMatrices { 224 | double **mean; /* mean vector sequence */ 225 | double **ivar; /* inverse diag variance sequence */ 226 | double *g; /* vector used in the forward substitution */ 227 | double **wuw; /* W' U^-1 W */ 228 | double *wum; /* W' U^-1 mu */ 229 | } HTS_SMatrices; 230 | 231 | /* HTS_PStream: individual PDF stream. */ 232 | typedef struct _HTS_PStream { 233 | size_t vector_length; /* vector length (static features only) */ 234 | size_t length; /* stream length */ 235 | size_t width; /* width of dynamic window */ 236 | double **par; /* output parameter vector */ 237 | HTS_SMatrices sm; /* matrices for parameter generation */ 238 | size_t win_size; /* # of windows (static + deltas) */ 239 | int *win_l_width; /* left width of windows */ 240 | int *win_r_width; /* right width of windows */ 241 | double **win_coefficient; /* window coefficients */ 242 | HTS_Boolean *msd_flag; /* Boolean sequence for MSD */ 243 | double *gv_mean; /* mean vector of GV */ 244 | double *gv_vari; /* variance vector of GV */ 245 | HTS_Boolean *gv_switch; /* GV flag sequence */ 246 | size_t gv_length; /* frame length for GV calculation */ 247 | } HTS_PStream; 248 | 249 | /* HTS_PStreamSet: set of PDF streams. */ 250 | typedef struct _HTS_PStreamSet { 251 | HTS_PStream *pstream; /* PDF streams */ 252 | size_t nstream; /* # of PDF streams */ 253 | size_t total_frame; /* total frame */ 254 | } HTS_PStreamSet; 255 | 256 | /* gstream --------------------------------------------------------- */ 257 | 258 | /* HTS_GStream: generated parameter stream. */ 259 | typedef struct _HTS_GStream { 260 | size_t vector_length; /* vector length (static features only) */ 261 | double **par; /* generated parameter */ 262 | } HTS_GStream; 263 | 264 | /* HTS_GStreamSet: set of generated parameter stream. */ 265 | typedef struct _HTS_GStreamSet { 266 | size_t total_nsample; /* total sample */ 267 | size_t total_frame; /* total frame */ 268 | size_t nstream; /* # of streams */ 269 | HTS_GStream *gstream; /* generated parameter streams */ 270 | double *gspeech; /* generated speech */ 271 | } HTS_GStreamSet; 272 | 273 | /* engine ---------------------------------------------------------- */ 274 | 275 | /* HTS_Condition: synthesis condition */ 276 | typedef struct _HTS_Condition { 277 | /* global */ 278 | size_t sampling_frequency; /* sampling frequency */ 279 | size_t fperiod; /* frame period */ 280 | size_t audio_buff_size; /* audio buffer size (for audio device) */ 281 | HTS_Boolean stop; /* stop flag */ 282 | double volume; /* volume */ 283 | double *msd_threshold; /* MSD thresholds */ 284 | double *gv_weight; /* GV weights */ 285 | 286 | /* duration */ 287 | HTS_Boolean phoneme_alignment_flag; /* flag for using phoneme alignment in label */ 288 | double speed; /* speech speed */ 289 | 290 | /* spectrum */ 291 | size_t stage; /* if stage=0 then gamma=0 else gamma=-1/stage */ 292 | HTS_Boolean use_log_gain; /* log gain flag (for LSP) */ 293 | double alpha; /* all-pass constant */ 294 | double beta; /* postfiltering coefficient */ 295 | 296 | /* log F0 */ 297 | double additional_half_tone; /* additional half tone */ 298 | 299 | /* interpolation weights */ 300 | double *duration_iw; /* weights for duration interpolation */ 301 | double **parameter_iw; /* weights for parameter interpolation */ 302 | double **gv_iw; /* weights for GV interpolation */ 303 | } HTS_Condition; 304 | 305 | /* HTS_Engine: Engine itself. */ 306 | typedef struct _HTS_Engine { 307 | HTS_Condition condition; /* synthesis condition */ 308 | HTS_Audio audio; /* audio output */ 309 | HTS_ModelSet ms; /* set of duration models, HMMs and GV models */ 310 | HTS_Label label; /* label */ 311 | HTS_SStreamSet sss; /* set of state streams */ 312 | HTS_PStreamSet pss; /* set of PDF streams */ 313 | HTS_GStreamSet gss; /* set of generated parameter streams */ 314 | } HTS_Engine; 315 | 316 | /* engine method --------------------------------------------------- */ 317 | 318 | /* HTS_Engine_initialize: initialize engine */ 319 | DLLEXPORT void HTS_Engine_initialize(HTS_Engine * engine); 320 | 321 | /* HTS_Engine_load: load HTS voices */ 322 | DLLEXPORT HTS_Boolean HTS_Engine_load(HTS_Engine * engine, char **voices, size_t num_voices); 323 | 324 | /* HTS_Engine_set_sampling_frequency: set sampling fraquency */ 325 | DLLEXPORT void HTS_Engine_set_sampling_frequency(HTS_Engine * engine, size_t i); 326 | 327 | /* HTS_Engine_get_sampling_frequency: get sampling frequency */ 328 | DLLEXPORT size_t HTS_Engine_get_sampling_frequency(HTS_Engine * engine); 329 | 330 | /* HTS_Engine_set_fperiod: set frame period */ 331 | DLLEXPORT void HTS_Engine_set_fperiod(HTS_Engine * engine, size_t i); 332 | 333 | /* HTS_Engine_get_fperiod: get frame period */ 334 | DLLEXPORT size_t HTS_Engine_get_fperiod(HTS_Engine * engine); 335 | 336 | /* HTS_Engine_set_audio_buff_size: set audio buffer size */ 337 | DLLEXPORT void HTS_Engine_set_audio_buff_size(HTS_Engine * engine, size_t i); 338 | 339 | /* HTS_Engine_get_audio_buff_size: get audio buffer size */ 340 | DLLEXPORT size_t HTS_Engine_get_audio_buff_size(HTS_Engine * engine); 341 | 342 | /* HTS_Engine_set_stop_flag: set stop flag */ 343 | DLLEXPORT void HTS_Engine_set_stop_flag(HTS_Engine * engine, HTS_Boolean b); 344 | 345 | /* HTS_Engine_get_stop_flag: get stop flag */ 346 | DLLEXPORT HTS_Boolean HTS_Engine_get_stop_flag(HTS_Engine * engine); 347 | 348 | /* HTS_Engine_set_volume: set volume in db */ 349 | DLLEXPORT void HTS_Engine_set_volume(HTS_Engine * engine, double f); 350 | 351 | /* HTS_Engine_get_volume: get volume in db */ 352 | DLLEXPORT double HTS_Engine_get_volume(HTS_Engine * engine); 353 | 354 | /* HTS_Egnine_set_msd_threshold: set MSD threshold */ 355 | DLLEXPORT void HTS_Engine_set_msd_threshold(HTS_Engine * engine, size_t stream_index, double f); 356 | 357 | /* HTS_Engine_get_msd_threshold: get MSD threshold */ 358 | DLLEXPORT double HTS_Engine_get_msd_threshold(HTS_Engine * engine, size_t stream_index); 359 | 360 | /* HTS_Engine_set_gv_weight: set GV weight */ 361 | DLLEXPORT void HTS_Engine_set_gv_weight(HTS_Engine * engine, size_t stream_index, double f); 362 | 363 | /* HTS_Engine_get_gv_weight: get GV weight */ 364 | DLLEXPORT double HTS_Engine_get_gv_weight(HTS_Engine * engine, size_t stream_index); 365 | 366 | /* HTS_Engine_set_speed: set speech speed */ 367 | DLLEXPORT void HTS_Engine_set_speed(HTS_Engine * engine, double f); 368 | 369 | /* HTS_Engine_set_phoneme_alignment_flag: set flag for using phoneme alignment in label */ 370 | DLLEXPORT void HTS_Engine_set_phoneme_alignment_flag(HTS_Engine * engine, HTS_Boolean b); 371 | 372 | /* HTS_Engine_set_alpha: set alpha */ 373 | DLLEXPORT void HTS_Engine_set_alpha(HTS_Engine * engine, double f); 374 | 375 | /* HTS_Engine_get_alpha: get alpha */ 376 | DLLEXPORT double HTS_Engine_get_alpha(HTS_Engine * engine); 377 | 378 | /* HTS_Engine_set_beta: set beta */ 379 | DLLEXPORT void HTS_Engine_set_beta(HTS_Engine * engine, double f); 380 | 381 | /* HTS_Engine_get_beta: get beta */ 382 | DLLEXPORT double HTS_Engine_get_beta(HTS_Engine * engine); 383 | 384 | /* HTS_Engine_add_half_tone: add half tone */ 385 | DLLEXPORT void HTS_Engine_add_half_tone(HTS_Engine * engine, double f); 386 | 387 | /* HTS_Engine_set_duration_interpolation_weight: set interpolation weight for duration */ 388 | DLLEXPORT void HTS_Engine_set_duration_interpolation_weight(HTS_Engine * engine, size_t voice_index, double f); 389 | 390 | /* HTS_Engine_get_duration_interpolation_weight: get interpolation weight for duration */ 391 | DLLEXPORT double HTS_Engine_get_duration_interpolation_weight(HTS_Engine * engine, size_t voice_index); 392 | 393 | /* HTS_Engine_set_parameter_interpolation_weight: set interpolation weight for parameter */ 394 | DLLEXPORT void HTS_Engine_set_parameter_interpolation_weight(HTS_Engine * engine, size_t voice_index, size_t stream_index, double f); 395 | 396 | /* HTS_Engine_get_parameter_interpolation_weight: get interpolation weight for parameter */ 397 | DLLEXPORT double HTS_Engine_get_parameter_interpolation_weight(HTS_Engine * engine, size_t voice_index, size_t stream_index); 398 | 399 | /* HTS_Engine_set_gv_interpolation_weight: set interpolation weight for GV */ 400 | DLLEXPORT void HTS_Engine_set_gv_interpolation_weight(HTS_Engine * engine, size_t voice_index, size_t stream_index, double f); 401 | 402 | /* HTS_Engine_get_gv_interpolation_weight: get interpolation weight for GV */ 403 | DLLEXPORT double HTS_Engine_get_gv_interpolation_weight(HTS_Engine * engine, size_t voice_index, size_t stream_index); 404 | 405 | /* HTS_Engine_get_total_state: get total number of state */ 406 | DLLEXPORT size_t HTS_Engine_get_total_state(HTS_Engine * engine); 407 | 408 | /* HTS_Engine_set_state_mean: set mean value of state */ 409 | DLLEXPORT void HTS_Engine_set_state_mean(HTS_Engine * engine, size_t stream_index, size_t state_index, size_t vector_index, double f); 410 | 411 | /* HTS_Engine_get_state_mean: get mean value of state */ 412 | DLLEXPORT double HTS_Engine_get_state_mean(HTS_Engine * engine, size_t stream_index, size_t state_index, size_t vector_index); 413 | 414 | /* HTS_Engine_get_state_duration: get state duration */ 415 | DLLEXPORT size_t HTS_Engine_get_state_duration(HTS_Engine * engine, size_t state_index); 416 | 417 | /* HTS_Engine_get_nvoices: get number of voices */ 418 | DLLEXPORT size_t HTS_Engine_get_nvoices(HTS_Engine * engine); 419 | 420 | /* HTS_Engine_get_nstream: get number of stream */ 421 | DLLEXPORT size_t HTS_Engine_get_nstream(HTS_Engine * engine); 422 | 423 | /* HTS_Engine_get_nstate: get number of state */ 424 | DLLEXPORT size_t HTS_Engine_get_nstate(HTS_Engine * engine); 425 | 426 | /* HTS_Engine_get_fullcontext_label_format: get full context label format */ 427 | DLLEXPORT const char *HTS_Engine_get_fullcontext_label_format(HTS_Engine * engine); 428 | 429 | /* HTS_Engine_get_fullcontext_label_version: get full context label version */ 430 | DLLEXPORT const char *HTS_Engine_get_fullcontext_label_version(HTS_Engine * engine); 431 | 432 | /* HTS_Engine_get_total_frame: get total number of frame */ 433 | DLLEXPORT size_t HTS_Engine_get_total_frame(HTS_Engine * engine); 434 | 435 | /* HTS_Engine_get_nsamples: get number of samples */ 436 | DLLEXPORT size_t HTS_Engine_get_nsamples(HTS_Engine * engine); 437 | 438 | /* HTS_Engine_get_generated_parameter: output generated parameter */ 439 | DLLEXPORT double HTS_Engine_get_generated_parameter(HTS_Engine * engine, size_t stream_index, size_t frame_index, size_t vector_index); 440 | 441 | /* HTS_Engine_get_generated_speech: output generated speech */ 442 | DLLEXPORT double HTS_Engine_get_generated_speech(HTS_Engine * engine, size_t index); 443 | 444 | /* HTS_Engine_synthesize_from_fn: synthesize speech from file name */ 445 | DLLEXPORT HTS_Boolean HTS_Engine_synthesize_from_fn(HTS_Engine * engine, const char *fn); 446 | 447 | /* HTS_Engine_synthesize_from_strings: synthesize speech from string list */ 448 | DLLEXPORT HTS_Boolean HTS_Engine_synthesize_from_strings(HTS_Engine * engine, char **lines, size_t num_lines); 449 | 450 | /* HTS_Engine_generate_state_sequence_from_fn: generate state sequence from file name (1st synthesis step) */ 451 | DLLEXPORT HTS_Boolean HTS_Engine_generate_state_sequence_from_fn(HTS_Engine * engine, const char *fn); 452 | 453 | /* HTS_Engine_generate_state_sequence_from_strings: generate state sequence from string list (1st synthesis step) */ 454 | DLLEXPORT HTS_Boolean HTS_Engine_generate_state_sequence_from_strings(HTS_Engine * engine, char **lines, size_t num_lines); 455 | 456 | /* HTS_Engine_generate_parameter_sequence: generate parameter sequence (2nd synthesis step) */ 457 | DLLEXPORT HTS_Boolean HTS_Engine_generate_parameter_sequence(HTS_Engine * engine); 458 | 459 | /* HTS_Engine_generate_sample_sequence: generate sample sequence (3rd synthesis step) */ 460 | DLLEXPORT HTS_Boolean HTS_Engine_generate_sample_sequence(HTS_Engine * engine); 461 | 462 | /* HTS_Engine_save_information: save trace information */ 463 | DLLEXPORT void HTS_Engine_save_information(HTS_Engine * engine, FILE * fp); 464 | 465 | /* HTS_Engine_save_label: save label with time */ 466 | DLLEXPORT void HTS_Engine_save_label(HTS_Engine * engine, FILE * fp); 467 | 468 | /* HTS_Engine_save_generated_parameter: save generated parameter */ 469 | DLLEXPORT void HTS_Engine_save_generated_parameter(HTS_Engine * engine, size_t stream_index, FILE * fp); 470 | 471 | /* HTS_Engine_save_generated_speech: save generated speech */ 472 | DLLEXPORT void HTS_Engine_save_generated_speech(HTS_Engine * engine, FILE * fp); 473 | 474 | /* HTS_Engine_save_riff: save RIFF format file */ 475 | DLLEXPORT void HTS_Engine_save_riff(HTS_Engine * engine, FILE * fp); 476 | 477 | /* HTS_Engine_refresh: free memory per one time synthesis */ 478 | DLLEXPORT void HTS_Engine_refresh(HTS_Engine * engine); 479 | 480 | /* HTS_Engine_clear: free engine */ 481 | DLLEXPORT void HTS_Engine_clear(HTS_Engine * engine); 482 | 483 | HTS_ENGINE_H_END; 484 | 485 | #endif /* !HTS_ENGINE_H */ 486 | -------------------------------------------------------------------------------- /src/lib/.cvsignore: -------------------------------------------------------------------------------- 1 | Makefile 2 | Makefile.in 3 | libHTSEngine.a 4 | .deps 5 | -------------------------------------------------------------------------------- /src/lib/HTS_audio.c: -------------------------------------------------------------------------------- 1 | /* ----------------------------------------------------------------- */ 2 | /* The HMM-Based Speech Synthesis Engine "hts_engine API" */ 3 | /* developed by HTS Working Group */ 4 | /* http://hts-engine.sourceforge.net/ */ 5 | /* ----------------------------------------------------------------- */ 6 | /* */ 7 | /* Copyright (c) 2001-2014 Nagoya Institute of Technology */ 8 | /* Department of Computer Science */ 9 | /* */ 10 | /* 2001-2008 Tokyo Institute of Technology */ 11 | /* Interdisciplinary Graduate School of */ 12 | /* Science and Engineering */ 13 | /* */ 14 | /* All rights reserved. */ 15 | /* */ 16 | /* Redistribution and use in source and binary forms, with or */ 17 | /* without modification, are permitted provided that the following */ 18 | /* conditions are met: */ 19 | /* */ 20 | /* - Redistributions of source code must retain the above copyright */ 21 | /* notice, this list of conditions and the following disclaimer. */ 22 | /* - Redistributions in binary form must reproduce the above */ 23 | /* copyright notice, this list of conditions and the following */ 24 | /* disclaimer in the documentation and/or other materials provided */ 25 | /* with the distribution. */ 26 | /* - Neither the name of the HTS working group nor the names of its */ 27 | /* contributors may be used to endorse or promote products derived */ 28 | /* from this software without specific prior written permission. */ 29 | /* */ 30 | /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */ 31 | /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */ 32 | /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ 33 | /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ 34 | /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */ 35 | /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, */ 36 | /* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED */ 37 | /* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, */ 38 | /* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */ 39 | /* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, */ 40 | /* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY */ 41 | /* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ 42 | /* POSSIBILITY OF SUCH DAMAGE. */ 43 | /* ----------------------------------------------------------------- */ 44 | 45 | #ifndef HTS_AUDIO_C 46 | #define HTS_AUDIO_C 47 | 48 | #ifdef __cplusplus 49 | #define HTS_AUDIO_C_START extern "C" { 50 | #define HTS_AUDIO_C_END } 51 | #else 52 | #define HTS_AUDIO_C_START 53 | #define HTS_AUDIO_C_END 54 | #endif /* __CPLUSPLUS */ 55 | 56 | HTS_AUDIO_C_START; 57 | 58 | #if !defined(AUDIO_PLAY_WIN32) && !defined(AUDIO_PLAY_PORTAUDIO) && !defined(AUDIO_PLAY_NONE) 59 | #if defined(__WINCE__) || defined(_WINCE) || defined(_WINCE) || defined(__WINCE) || defined(__WIN32__) || defined(__WIN32) || defined(_WIN32) || defined(WIN32) || defined(__CYGWIN__) || defined(__MINGW32__) 60 | #define AUDIO_PLAY_WIN32 61 | #else 62 | #define AUDIO_PLAY_NONE 63 | #endif /* __WINCE__ || _WINCE || _WINCE || __WINCE || __WIN32__ || __WIN32 || _WIN32 || WIN32 || __CYGWIN__ || __MINGW32__ */ 64 | #endif /* !AUDIO_PLAY_WIN32 && !AUDIO_PLAY_PORTAUDIO && !AUDIO_PLAY_NONE */ 65 | 66 | /* hts_engine libralies */ 67 | #include "HTS_hidden.h" 68 | 69 | #ifdef AUDIO_PLAY_WIN32 70 | 71 | #include 72 | #include 73 | #define AUDIO_WAIT_BUFF_MS 10 /* wait time (0.01 sec) */ 74 | #define AUDIO_CHANNEL 1 /* monaural */ 75 | #ifdef _M_X64 76 | #define AUDIO_POINTER_TYPE DWORD_PTR 77 | #else 78 | #define AUDIO_POINTER_TYPE DWORD 79 | #endif 80 | 81 | /* HTS_Audio: audio interface for Windows */ 82 | typedef struct _HTS_AudioInterface { 83 | HWAVEOUT hwaveout; /* audio device handle */ 84 | WAVEFORMATEX waveformatex; /* wave formatex */ 85 | unsigned char which_buff; /* double buffering flag */ 86 | HTS_Boolean now_buff_1; /* double buffering flag */ 87 | HTS_Boolean now_buff_2; /* double buffering flag */ 88 | WAVEHDR buff_1; /* buffer */ 89 | WAVEHDR buff_2; /* buffer */ 90 | } HTS_AudioInterface; 91 | 92 | /* HTS_AudioInterface_callback_function: callback function from audio device */ 93 | static void CALLBACK HTS_AudioInterface_callback_function(HWAVEOUT hwaveout, UINT msg, AUDIO_POINTER_TYPE user_data, AUDIO_POINTER_TYPE param1, AUDIO_POINTER_TYPE param2) 94 | { 95 | WAVEHDR *wavehdr = (WAVEHDR *) param1; 96 | HTS_AudioInterface *audio_interface = (HTS_AudioInterface *) user_data; 97 | 98 | if (msg == MM_WOM_DONE && wavehdr && (wavehdr->dwFlags & WHDR_DONE)) { 99 | if (audio_interface->now_buff_1 == TRUE && wavehdr == &(audio_interface->buff_1)) { 100 | audio_interface->now_buff_1 = FALSE; 101 | } else if (audio_interface->now_buff_2 == TRUE && wavehdr == &(audio_interface->buff_2)) { 102 | audio_interface->now_buff_2 = FALSE; 103 | } 104 | } 105 | } 106 | 107 | /* HTS_AudioInterface_write: send buffer to audio device */ 108 | static HTS_Boolean HTS_AudioInterface_write(HTS_AudioInterface * audio_interface, const short *buff, size_t buff_size) 109 | { 110 | MMRESULT result; 111 | 112 | if (audio_interface->which_buff == 1) { 113 | while (audio_interface->now_buff_1 == TRUE) 114 | Sleep(AUDIO_WAIT_BUFF_MS); 115 | audio_interface->now_buff_1 = TRUE; 116 | audio_interface->which_buff = 2; 117 | memcpy(audio_interface->buff_1.lpData, buff, buff_size * sizeof(short)); 118 | audio_interface->buff_1.dwBufferLength = (DWORD) buff_size *sizeof(short); 119 | result = waveOutWrite(audio_interface->hwaveout, &(audio_interface->buff_1), sizeof(WAVEHDR)); 120 | } else { 121 | while (audio_interface->now_buff_2 == TRUE) 122 | Sleep(AUDIO_WAIT_BUFF_MS); 123 | audio_interface->now_buff_2 = TRUE; 124 | audio_interface->which_buff = 1; 125 | memcpy(audio_interface->buff_2.lpData, buff, buff_size * sizeof(short)); 126 | audio_interface->buff_2.dwBufferLength = (DWORD) buff_size *sizeof(short); 127 | result = waveOutWrite(audio_interface->hwaveout, &(audio_interface->buff_2), sizeof(WAVEHDR)); 128 | } 129 | 130 | if (result != MMSYSERR_NOERROR) 131 | HTS_error(0, "hts_engine: Cannot send datablocks to your output audio device to play waveform.\n"); 132 | 133 | return (result == MMSYSERR_NOERROR) ? TRUE : FALSE; 134 | } 135 | 136 | /* HTS_AudioInterface_close: close audio device */ 137 | static void HTS_AudioInterface_close(HTS_AudioInterface * audio_interface) 138 | { 139 | MMRESULT result; 140 | 141 | /* stop audio */ 142 | result = waveOutReset(audio_interface->hwaveout); 143 | if (result != MMSYSERR_NOERROR) 144 | HTS_error(0, "hts_engine: Cannot stop and reset your output audio device.\n"); 145 | /* unprepare */ 146 | result = waveOutUnprepareHeader(audio_interface->hwaveout, &(audio_interface->buff_1), sizeof(WAVEHDR)); 147 | if (result != MMSYSERR_NOERROR) 148 | HTS_error(0, "hts_engine: Cannot cleanup the audio datablocks to play waveform.\n"); 149 | result = waveOutUnprepareHeader(audio_interface->hwaveout, &(audio_interface->buff_2), sizeof(WAVEHDR)); 150 | if (result != MMSYSERR_NOERROR) 151 | HTS_error(0, "hts_engine: Cannot cleanup the audio datablocks to play waveform.\n"); 152 | /* close */ 153 | result = waveOutClose(audio_interface->hwaveout); 154 | if (result != MMSYSERR_NOERROR) 155 | HTS_error(0, "hts_engine: Failed to close your output audio device.\n"); 156 | if (audio_interface->buff_1.lpData != NULL) 157 | HTS_free(audio_interface->buff_1.lpData); 158 | if (audio_interface->buff_2.lpData != NULL) 159 | HTS_free(audio_interface->buff_2.lpData); 160 | 161 | HTS_free(audio_interface); 162 | } 163 | 164 | static HTS_AudioInterface *HTS_AudioInterface_open(size_t sampling_frequency, size_t max_buff_size) 165 | { 166 | HTS_AudioInterface *audio_interface; 167 | MMRESULT result; 168 | 169 | /* make audio interface */ 170 | audio_interface = (HTS_AudioInterface *) HTS_calloc(1, sizeof(HTS_AudioInterface)); 171 | 172 | audio_interface->hwaveout = 0; 173 | audio_interface->which_buff = 1; 174 | audio_interface->now_buff_1 = FALSE; 175 | audio_interface->now_buff_2 = FALSE; 176 | 177 | /* format */ 178 | audio_interface->waveformatex.wFormatTag = WAVE_FORMAT_PCM; 179 | audio_interface->waveformatex.nChannels = AUDIO_CHANNEL; 180 | audio_interface->waveformatex.nSamplesPerSec = (DWORD) sampling_frequency; 181 | audio_interface->waveformatex.wBitsPerSample = sizeof(short) * 8; 182 | audio_interface->waveformatex.nBlockAlign = AUDIO_CHANNEL * audio_interface->waveformatex.wBitsPerSample / 8; 183 | audio_interface->waveformatex.nAvgBytesPerSec = (DWORD) sampling_frequency *audio_interface->waveformatex.nBlockAlign; 184 | /* open */ 185 | result = waveOutOpen(&audio_interface->hwaveout, WAVE_MAPPER, &audio_interface->waveformatex, (AUDIO_POINTER_TYPE) HTS_AudioInterface_callback_function, (AUDIO_POINTER_TYPE) audio_interface, CALLBACK_FUNCTION); 186 | if (result != MMSYSERR_NOERROR) { 187 | HTS_error(0, "hts_engine: Failed to open your output audio_interface device to play waveform.\n"); 188 | HTS_free(audio_interface); 189 | return NULL; 190 | } 191 | 192 | /* prepare */ 193 | audio_interface->buff_1.lpData = (LPSTR) HTS_calloc(max_buff_size, sizeof(short)); 194 | audio_interface->buff_1.dwBufferLength = (DWORD) max_buff_size *sizeof(short); 195 | audio_interface->buff_1.dwFlags = WHDR_BEGINLOOP | WHDR_ENDLOOP; 196 | audio_interface->buff_1.dwLoops = 1; 197 | audio_interface->buff_1.lpNext = 0; 198 | audio_interface->buff_1.reserved = 0; 199 | result = waveOutPrepareHeader(audio_interface->hwaveout, &(audio_interface->buff_1), sizeof(WAVEHDR)); 200 | if (result != MMSYSERR_NOERROR) { 201 | HTS_error(0, "hts_engine: Cannot initialize audio_interface datablocks to play waveform.\n"); 202 | HTS_free(audio_interface->buff_1.lpData); 203 | HTS_free(audio_interface); 204 | return NULL; 205 | } 206 | audio_interface->buff_2.lpData = (LPSTR) HTS_calloc(max_buff_size, sizeof(short)); 207 | audio_interface->buff_2.dwBufferLength = (DWORD) max_buff_size *sizeof(short); 208 | audio_interface->buff_2.dwFlags = WHDR_BEGINLOOP | WHDR_ENDLOOP; 209 | audio_interface->buff_2.dwLoops = 1; 210 | audio_interface->buff_2.lpNext = 0; 211 | audio_interface->buff_2.reserved = 0; 212 | result = waveOutPrepareHeader(audio_interface->hwaveout, &(audio_interface->buff_2), sizeof(WAVEHDR)); 213 | if (result != MMSYSERR_NOERROR) { 214 | HTS_error(0, "hts_engine: Cannot initialize audio_interface datablocks to play waveform.\n"); 215 | HTS_free(audio_interface->buff_1.lpData); 216 | HTS_free(audio_interface->buff_2.lpData); 217 | HTS_free(audio_interface); 218 | return NULL; 219 | } 220 | 221 | return audio_interface; 222 | } 223 | 224 | /* HTS_Audio_initialize: initialize audio */ 225 | void HTS_Audio_initialize(HTS_Audio * audio) 226 | { 227 | if (audio == NULL) 228 | return; 229 | 230 | audio->sampling_frequency = 0; 231 | audio->max_buff_size = 0; 232 | audio->buff = NULL; 233 | audio->buff_size = 0; 234 | audio->audio_interface = NULL; 235 | } 236 | 237 | /* HTS_Audio_set_parameter: set parameters for audio */ 238 | void HTS_Audio_set_parameter(HTS_Audio * audio, size_t sampling_frequency, size_t max_buff_size) 239 | { 240 | if (audio == NULL) 241 | return; 242 | 243 | if (audio->sampling_frequency == sampling_frequency && audio->max_buff_size == max_buff_size) 244 | return; 245 | 246 | HTS_Audio_clear(audio); 247 | 248 | if (sampling_frequency == 0 || max_buff_size == 0) 249 | return; 250 | 251 | audio->audio_interface = HTS_AudioInterface_open(sampling_frequency, max_buff_size); 252 | if (audio->audio_interface == NULL) 253 | return; 254 | 255 | audio->sampling_frequency = sampling_frequency; 256 | audio->max_buff_size = max_buff_size; 257 | audio->buff = (short *) HTS_calloc(max_buff_size, sizeof(short)); 258 | audio->buff_size = 0; 259 | } 260 | 261 | /* HTS_Audio_write: send data to audio */ 262 | void HTS_Audio_write(HTS_Audio * audio, short data) 263 | { 264 | if (audio == NULL || audio->audio_interface == NULL) 265 | return; 266 | 267 | audio->buff[audio->buff_size++] = data; 268 | 269 | if (audio->buff_size >= audio->max_buff_size) { 270 | if (HTS_AudioInterface_write((HTS_AudioInterface *) audio->audio_interface, audio->buff, audio->buff_size) != TRUE) { 271 | HTS_Audio_clear(audio); 272 | return; 273 | } 274 | audio->buff_size = 0; 275 | } 276 | } 277 | 278 | /* HTS_Audio_flush: flush remain data */ 279 | void HTS_Audio_flush(HTS_Audio * audio) 280 | { 281 | HTS_AudioInterface *audio_interface; 282 | 283 | if (audio == NULL || audio->audio_interface == NULL) 284 | return; 285 | 286 | audio_interface = (HTS_AudioInterface *) audio->audio_interface; 287 | if (audio->buff_size > 0) { 288 | if (HTS_AudioInterface_write(audio_interface, audio->buff, audio->buff_size) != TRUE) { 289 | HTS_Audio_clear(audio); 290 | return; 291 | } 292 | audio->buff_size = 0; 293 | } 294 | while (audio_interface->now_buff_1 == TRUE || audio_interface->now_buff_2 == TRUE) 295 | Sleep(AUDIO_WAIT_BUFF_MS); 296 | } 297 | 298 | /* HTS_Audio_clear: free audio */ 299 | void HTS_Audio_clear(HTS_Audio * audio) 300 | { 301 | HTS_AudioInterface *audio_interface; 302 | 303 | if (audio == NULL || audio->audio_interface == NULL) 304 | return; 305 | 306 | audio_interface = (HTS_AudioInterface *) audio->audio_interface; 307 | HTS_AudioInterface_close(audio_interface); 308 | if (audio->buff != NULL) 309 | free(audio->buff); 310 | HTS_Audio_initialize(audio); 311 | } 312 | 313 | #endif /* AUDIO_PLAY_WIN32 */ 314 | 315 | #ifdef AUDIO_PLAY_PORTAUDIO 316 | 317 | #include "portaudio.h" 318 | 319 | /* HTS_AudioInterface: audio output for PortAudio */ 320 | typedef struct _HTS_AudioInterface { 321 | PaStreamParameters parameters; /* parameters for output stream */ 322 | PaStream *stream; /* output stream */ 323 | } HTS_AudioInterface; 324 | 325 | /* HTS_AudioInterface_write: send data to audio device */ 326 | static void HTS_AudioInterface_write(HTS_AudioInterface * audio_interface, const short *buff, size_t buff_size) 327 | { 328 | PaError err; 329 | 330 | err = Pa_WriteStream(audio_interface->stream, buff, buff_size); 331 | if (err != paNoError && err != paOutputUnderflowed) 332 | HTS_error(0, "hts_engine: Cannot send datablocks to your output audio device to play waveform.\n"); 333 | } 334 | 335 | /* HTS_AudioInterface_close: close audio device */ 336 | static void HTS_AudioInterface_close(HTS_AudioInterface * audio_interface) 337 | { 338 | PaError err; 339 | 340 | err = Pa_StopStream(audio_interface->stream); 341 | if (err != paNoError) 342 | HTS_error(0, "hts_engine: Cannot stop your output audio device.\n"); 343 | err = Pa_CloseStream(audio_interface->stream); 344 | if (err != paNoError) 345 | HTS_error(0, "hts_engine: Failed to close your output audio device.\n"); 346 | Pa_Terminate(); 347 | 348 | HTS_free(audio_interface); 349 | } 350 | 351 | static HTS_AudioInterface *HTS_AudioInterface_open(size_t sampling_frequency, size_t max_buff_size) 352 | { 353 | HTS_AudioInterface *audio_interface; 354 | PaError err; 355 | 356 | audio_interface = HTS_calloc(1, sizeof(HTS_AudioInterface)); 357 | audio_interface->stream = NULL; 358 | 359 | err = Pa_Initialize(); 360 | if (err != paNoError) { 361 | HTS_error(0, "hts_engine: Failed to initialize your output audio device to play waveform.\n"); 362 | HTS_free(audio_interface); 363 | return NULL; 364 | } 365 | 366 | audio_interface->parameters.device = Pa_GetDefaultOutputDevice(); 367 | audio_interface->parameters.channelCount = 1; 368 | audio_interface->parameters.sampleFormat = paInt16; 369 | audio_interface->parameters.suggestedLatency = Pa_GetDeviceInfo(audio_interface->parameters.device)->defaultLowOutputLatency; 370 | audio_interface->parameters.hostApiSpecificStreamInfo = NULL; 371 | 372 | err = Pa_OpenStream(&audio_interface->stream, NULL, &audio_interface->parameters, sampling_frequency, max_buff_size, paClipOff, NULL, NULL); 373 | if (err != paNoError) { 374 | HTS_error(0, "hts_engine: Failed to open your output audio device to play waveform.\n"); 375 | Pa_Terminate(); 376 | HTS_free(audio_interface); 377 | return NULL; 378 | } 379 | 380 | err = Pa_StartStream(audio_interface->stream); 381 | if (err != paNoError) { 382 | HTS_error(0, "hts_engine: Failed to start your output audio device to play waveform.\n"); 383 | Pa_CloseStream(audio_interface->stream); 384 | Pa_Terminate(); 385 | HTS_free(audio_interface); 386 | return NULL; 387 | } 388 | 389 | return audio_interface; 390 | } 391 | 392 | /* HTS_Audio_initialize: initialize audio */ 393 | void HTS_Audio_initialize(HTS_Audio * audio) 394 | { 395 | if (audio == NULL) 396 | return; 397 | 398 | audio->sampling_frequency = 0; 399 | audio->max_buff_size = 0; 400 | audio->buff = NULL; 401 | audio->buff_size = 0; 402 | audio->audio_interface = NULL; 403 | } 404 | 405 | /* HTS_Audio_set_parameter: set parameters for audio */ 406 | void HTS_Audio_set_parameter(HTS_Audio * audio, size_t sampling_frequency, size_t max_buff_size) 407 | { 408 | if (audio == NULL) 409 | return; 410 | 411 | if (audio->sampling_frequency == sampling_frequency && audio->max_buff_size == max_buff_size) 412 | return; 413 | 414 | HTS_Audio_clear(audio); 415 | 416 | if (sampling_frequency == 0 || max_buff_size == 0) 417 | return; 418 | 419 | audio->audio_interface = HTS_AudioInterface_open(sampling_frequency, max_buff_size); 420 | if (audio->audio_interface == NULL) 421 | return; 422 | 423 | audio->sampling_frequency = sampling_frequency; 424 | audio->max_buff_size = max_buff_size; 425 | audio->buff = (short *) HTS_calloc(max_buff_size, sizeof(short)); 426 | audio->buff_size = 0; 427 | } 428 | 429 | /* HTS_Audio_write: send data to audio device */ 430 | void HTS_Audio_write(HTS_Audio * audio, short data) 431 | { 432 | if (audio == NULL) 433 | return; 434 | 435 | audio->buff[audio->buff_size++] = data; 436 | 437 | if (audio->buff_size >= audio->max_buff_size) { 438 | if (audio->audio_interface != NULL) 439 | HTS_AudioInterface_write((HTS_AudioInterface *) audio->audio_interface, audio->buff, audio->max_buff_size); 440 | audio->buff_size = 0; 441 | } 442 | } 443 | 444 | /* HTS_Audio_flush: flush remain data */ 445 | void HTS_Audio_flush(HTS_Audio * audio) 446 | { 447 | HTS_AudioInterface *audio_interface; 448 | 449 | if (audio == NULL || audio->audio_interface == NULL) 450 | return; 451 | 452 | audio_interface = (HTS_AudioInterface *) audio->audio_interface; 453 | if (audio->buff_size > 0) { 454 | HTS_AudioInterface_write(audio_interface, audio->buff, audio->buff_size); 455 | audio->buff_size = 0; 456 | } 457 | } 458 | 459 | /* HTS_Audio_clear: free audio */ 460 | void HTS_Audio_clear(HTS_Audio * audio) 461 | { 462 | HTS_AudioInterface *audio_interface; 463 | 464 | if (audio == NULL || audio->audio_interface == NULL) 465 | return; 466 | audio_interface = (HTS_AudioInterface *) audio->audio_interface; 467 | 468 | HTS_Audio_flush(audio); 469 | HTS_AudioInterface_close(audio_interface); 470 | if (audio->buff != NULL) 471 | HTS_free(audio->buff); 472 | HTS_Audio_initialize(audio); 473 | } 474 | 475 | #endif /* AUDIO_PLAY_PORTAUDIO */ 476 | 477 | #ifdef AUDIO_PLAY_NONE 478 | 479 | /* HTS_Audio_initialize: initialize audio */ 480 | void HTS_Audio_initialize(HTS_Audio * audio) 481 | { 482 | } 483 | 484 | /* HTS_Audio_set_parameter: set parameters for audio */ 485 | void HTS_Audio_set_parameter(HTS_Audio * audio, size_t sampling_frequeny, size_t max_buff_size) 486 | { 487 | } 488 | 489 | /* HTS_Audio_write: send data to audio */ 490 | void HTS_Audio_write(HTS_Audio * audio, short data) 491 | { 492 | } 493 | 494 | /* HTS_Audio_flush: flush remain data */ 495 | void HTS_Audio_flush(HTS_Audio * audio) 496 | { 497 | } 498 | 499 | /* HTS_Audio_clear: free audio */ 500 | void HTS_Audio_clear(HTS_Audio * audio) 501 | { 502 | } 503 | 504 | #endif /* AUDIO_PLAY_NONE */ 505 | 506 | HTS_AUDIO_C_END; 507 | 508 | #endif /* !HTS_AUDIO_C */ 509 | -------------------------------------------------------------------------------- /src/lib/HTS_gstream.c: -------------------------------------------------------------------------------- 1 | /* ----------------------------------------------------------------- */ 2 | /* The HMM-Based Speech Synthesis Engine "hts_engine API" */ 3 | /* developed by HTS Working Group */ 4 | /* http://hts-engine.sourceforge.net/ */ 5 | /* ----------------------------------------------------------------- */ 6 | /* */ 7 | /* Copyright (c) 2001-2014 Nagoya Institute of Technology */ 8 | /* Department of Computer Science */ 9 | /* */ 10 | /* 2001-2008 Tokyo Institute of Technology */ 11 | /* Interdisciplinary Graduate School of */ 12 | /* Science and Engineering */ 13 | /* */ 14 | /* All rights reserved. */ 15 | /* */ 16 | /* Redistribution and use in source and binary forms, with or */ 17 | /* without modification, are permitted provided that the following */ 18 | /* conditions are met: */ 19 | /* */ 20 | /* - Redistributions of source code must retain the above copyright */ 21 | /* notice, this list of conditions and the following disclaimer. */ 22 | /* - Redistributions in binary form must reproduce the above */ 23 | /* copyright notice, this list of conditions and the following */ 24 | /* disclaimer in the documentation and/or other materials provided */ 25 | /* with the distribution. */ 26 | /* - Neither the name of the HTS working group nor the names of its */ 27 | /* contributors may be used to endorse or promote products derived */ 28 | /* from this software without specific prior written permission. */ 29 | /* */ 30 | /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */ 31 | /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */ 32 | /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ 33 | /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ 34 | /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */ 35 | /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, */ 36 | /* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED */ 37 | /* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, */ 38 | /* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */ 39 | /* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, */ 40 | /* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY */ 41 | /* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ 42 | /* POSSIBILITY OF SUCH DAMAGE. */ 43 | /* ----------------------------------------------------------------- */ 44 | 45 | #ifndef HTS_GSTREAM_C 46 | #define HTS_GSTREAM_C 47 | 48 | #ifdef __cplusplus 49 | #define HTS_GSTREAM_C_START extern "C" { 50 | #define HTS_GSTREAM_C_END } 51 | #else 52 | #define HTS_GSTREAM_C_START 53 | #define HTS_GSTREAM_C_END 54 | #endif /* __CPLUSPLUS */ 55 | 56 | HTS_GSTREAM_C_START; 57 | 58 | /* hts_engine libraries */ 59 | #include "HTS_hidden.h" 60 | 61 | /* HTS_GStreamSet_initialize: initialize generated parameter stream set */ 62 | void HTS_GStreamSet_initialize(HTS_GStreamSet * gss) 63 | { 64 | gss->nstream = 0; 65 | gss->total_frame = 0; 66 | gss->total_nsample = 0; 67 | gss->gstream = NULL; 68 | gss->gspeech = NULL; 69 | } 70 | 71 | /* HTS_GStreamSet_create: generate speech */ 72 | HTS_Boolean HTS_GStreamSet_create(HTS_GStreamSet * gss, HTS_PStreamSet * pss, size_t stage, HTS_Boolean use_log_gain, size_t sampling_rate, size_t fperiod, double alpha, double beta, HTS_Boolean * stop, double volume, HTS_Audio * audio) 73 | { 74 | size_t i, j, k; 75 | size_t msd_frame; 76 | HTS_Vocoder v; 77 | size_t nlpf = 0; 78 | double *lpf = NULL; 79 | 80 | /* check */ 81 | if (gss->gstream || gss->gspeech) { 82 | HTS_error(1, "HTS_GStreamSet_create: HTS_GStreamSet is not initialized.\n"); 83 | return FALSE; 84 | } 85 | 86 | /* initialize */ 87 | gss->nstream = HTS_PStreamSet_get_nstream(pss); 88 | gss->total_frame = HTS_PStreamSet_get_total_frame(pss); 89 | gss->total_nsample = fperiod * gss->total_frame; 90 | gss->gstream = (HTS_GStream *) HTS_calloc(gss->nstream, sizeof(HTS_GStream)); 91 | for (i = 0; i < gss->nstream; i++) { 92 | gss->gstream[i].vector_length = HTS_PStreamSet_get_vector_length(pss, i); 93 | gss->gstream[i].par = (double **) HTS_calloc(gss->total_frame, sizeof(double *)); 94 | for (j = 0; j < gss->total_frame; j++) 95 | gss->gstream[i].par[j] = (double *) HTS_calloc(gss->gstream[i].vector_length, sizeof(double)); 96 | } 97 | gss->gspeech = (double *) HTS_calloc(gss->total_nsample, sizeof(double)); 98 | 99 | /* copy generated parameter */ 100 | for (i = 0; i < gss->nstream; i++) { 101 | if (HTS_PStreamSet_is_msd(pss, i)) { /* for MSD */ 102 | for (j = 0, msd_frame = 0; j < gss->total_frame; j++) 103 | if (HTS_PStreamSet_get_msd_flag(pss, i, j)) { 104 | for (k = 0; k < gss->gstream[i].vector_length; k++) 105 | gss->gstream[i].par[j][k] = HTS_PStreamSet_get_parameter(pss, i, msd_frame, k); 106 | msd_frame++; 107 | } else 108 | for (k = 0; k < gss->gstream[i].vector_length; k++) 109 | gss->gstream[i].par[j][k] = HTS_NODATA; 110 | } else { /* for non MSD */ 111 | for (j = 0; j < gss->total_frame; j++) 112 | for (k = 0; k < gss->gstream[i].vector_length; k++) 113 | gss->gstream[i].par[j][k] = HTS_PStreamSet_get_parameter(pss, i, j, k); 114 | } 115 | } 116 | 117 | /* check */ 118 | if (gss->nstream != 2 && gss->nstream != 3) { 119 | HTS_error(1, "HTS_GStreamSet_create: The number of streams should be 2 or 3.\n"); 120 | HTS_GStreamSet_clear(gss); 121 | return FALSE; 122 | } 123 | if (HTS_PStreamSet_get_vector_length(pss, 1) != 1) { 124 | HTS_error(1, "HTS_GStreamSet_create: The size of lf0 static vector should be 1.\n"); 125 | HTS_GStreamSet_clear(gss); 126 | return FALSE; 127 | } 128 | if (gss->nstream >= 3 && gss->gstream[2].vector_length % 2 == 0) { 129 | HTS_error(1, "HTS_GStreamSet_create: The number of low-pass filter coefficient should be odd numbers."); 130 | HTS_GStreamSet_clear(gss); 131 | return FALSE; 132 | } 133 | 134 | /* synthesize speech waveform */ 135 | HTS_Vocoder_initialize(&v, gss->gstream[0].vector_length - 1, stage, use_log_gain, sampling_rate, fperiod); 136 | if (gss->nstream >= 3) 137 | nlpf = gss->gstream[2].vector_length; 138 | for (i = 0; i < gss->total_frame && (*stop) == FALSE; i++) { 139 | j = i * fperiod; 140 | if (gss->nstream >= 3) 141 | lpf = &gss->gstream[2].par[i][0]; 142 | HTS_Vocoder_synthesize(&v, gss->gstream[0].vector_length - 1, gss->gstream[1].par[i][0], &gss->gstream[0].par[i][0], nlpf, lpf, alpha, beta, volume, &gss->gspeech[j], audio); 143 | } 144 | HTS_Vocoder_clear(&v); 145 | if (audio) 146 | HTS_Audio_flush(audio); 147 | 148 | return TRUE; 149 | } 150 | 151 | /* HTS_GStreamSet_get_total_nsamples: get total number of sample */ 152 | size_t HTS_GStreamSet_get_total_nsamples(HTS_GStreamSet * gss) 153 | { 154 | return gss->total_nsample; 155 | } 156 | 157 | /* HTS_GStreamSet_get_total_frame: get total number of frame */ 158 | size_t HTS_GStreamSet_get_total_frame(HTS_GStreamSet * gss) 159 | { 160 | return gss->total_frame; 161 | } 162 | 163 | /* HTS_GStreamSet_get_vector_length: get features length */ 164 | size_t HTS_GStreamSet_get_vector_length(HTS_GStreamSet * gss, size_t stream_index) 165 | { 166 | return gss->gstream[stream_index].vector_length; 167 | } 168 | 169 | /* HTS_GStreamSet_get_speech: get synthesized speech parameter */ 170 | double HTS_GStreamSet_get_speech(HTS_GStreamSet * gss, size_t sample_index) 171 | { 172 | return gss->gspeech[sample_index]; 173 | } 174 | 175 | /* HTS_GStreamSet_get_parameter: get generated parameter */ 176 | double HTS_GStreamSet_get_parameter(HTS_GStreamSet * gss, size_t stream_index, size_t frame_index, size_t vector_index) 177 | { 178 | return gss->gstream[stream_index].par[frame_index][vector_index]; 179 | } 180 | 181 | /* HTS_GStreamSet_clear: free generated parameter stream set */ 182 | void HTS_GStreamSet_clear(HTS_GStreamSet * gss) 183 | { 184 | size_t i, j; 185 | 186 | if (gss->gstream) { 187 | for (i = 0; i < gss->nstream; i++) { 188 | if (gss->gstream[i].par != NULL) { 189 | for (j = 0; j < gss->total_frame; j++) 190 | HTS_free(gss->gstream[i].par[j]); 191 | HTS_free(gss->gstream[i].par); 192 | } 193 | } 194 | HTS_free(gss->gstream); 195 | } 196 | if (gss->gspeech) 197 | HTS_free(gss->gspeech); 198 | HTS_GStreamSet_initialize(gss); 199 | } 200 | 201 | HTS_GSTREAM_C_END; 202 | 203 | #endif /* !HTS_GSTREAM_C */ 204 | -------------------------------------------------------------------------------- /src/lib/HTS_hidden.h: -------------------------------------------------------------------------------- 1 | /* ----------------------------------------------------------------- */ 2 | /* The HMM-Based Speech Synthesis Engine "hts_engine API" */ 3 | /* developed by HTS Working Group */ 4 | /* http://hts-engine.sourceforge.net/ */ 5 | /* ----------------------------------------------------------------- */ 6 | /* */ 7 | /* Copyright (c) 2001-2014 Nagoya Institute of Technology */ 8 | /* Department of Computer Science */ 9 | /* */ 10 | /* 2001-2008 Tokyo Institute of Technology */ 11 | /* Interdisciplinary Graduate School of */ 12 | /* Science and Engineering */ 13 | /* */ 14 | /* All rights reserved. */ 15 | /* */ 16 | /* Redistribution and use in source and binary forms, with or */ 17 | /* without modification, are permitted provided that the following */ 18 | /* conditions are met: */ 19 | /* */ 20 | /* - Redistributions of source code must retain the above copyright */ 21 | /* notice, this list of conditions and the following disclaimer. */ 22 | /* - Redistributions in binary form must reproduce the above */ 23 | /* copyright notice, this list of conditions and the following */ 24 | /* disclaimer in the documentation and/or other materials provided */ 25 | /* with the distribution. */ 26 | /* - Neither the name of the HTS working group nor the names of its */ 27 | /* contributors may be used to endorse or promote products derived */ 28 | /* from this software without specific prior written permission. */ 29 | /* */ 30 | /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */ 31 | /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */ 32 | /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ 33 | /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ 34 | /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */ 35 | /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, */ 36 | /* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED */ 37 | /* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, */ 38 | /* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */ 39 | /* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, */ 40 | /* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY */ 41 | /* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ 42 | /* POSSIBILITY OF SUCH DAMAGE. */ 43 | /* ----------------------------------------------------------------- */ 44 | 45 | #ifndef HTS_HIDDEN_H 46 | #define HTS_HIDDEN_H 47 | 48 | #ifdef __cplusplus 49 | #define HTS_HIDDEN_H_START extern "C" { 50 | #define HTS_HIDDEN_H_END } 51 | #else 52 | #define HTS_HIDDEN_H_START 53 | #define HTS_HIDDEN_H_END 54 | #endif /* __CPLUSPLUS */ 55 | 56 | HTS_HIDDEN_H_START; 57 | 58 | /* hts_engine libraries */ 59 | #include "HTS_engine.h" 60 | 61 | /* common ---------------------------------------------------------- */ 62 | 63 | #define HTS_MAXBUFLEN 1024 64 | 65 | #if !defined(WORDS_BIGENDIAN) && !defined(WORDS_LITTLEENDIAN) 66 | #define WORDS_LITTLEENDIAN 67 | #endif /* !WORDS_BIGENDIAN && !WORDS_LITTLEENDIAN */ 68 | #if defined(WORDS_BIGENDIAN) && defined(WORDS_LITTLEENDIAN) 69 | #undef WORDS_BIGENDIAN 70 | #endif /* WORDS_BIGENDIAN && WORDS_LITTLEENDIAN */ 71 | 72 | #define MAX_F0 20000.0 73 | #define MIN_F0 20.0 74 | #define MAX_LF0 9.9034875525361280454891979401956 /* log(20000.0) */ 75 | #define MIN_LF0 2.9957322735539909934352235761425 /* log(20.0) */ 76 | #define HALF_TONE 0.05776226504666210911810267678818 /* log(2.0) / 12.0 */ 77 | #define DB 0.11512925464970228420089957273422 /* log(10.0) / 20.0 */ 78 | 79 | /* misc ------------------------------------------------------------ */ 80 | 81 | typedef struct _HTS_File { 82 | unsigned char type; 83 | void *pointer; 84 | } HTS_File; 85 | 86 | /* HTS_fopen: wrapper for fopen */ 87 | HTS_File *HTS_fopen_from_fn(const char *name, const char *opt); 88 | 89 | /* HTS_fopen_from_fp: wrapper for fopen */ 90 | HTS_File *HTS_fopen_from_fp(HTS_File * fp, size_t size); 91 | 92 | /* HTS_fopen_from_data: wrapper for fopen */ 93 | HTS_File *HTS_fopen_from_data(void *data, size_t size); 94 | 95 | /* HTS_fclose: wrapper for fclose */ 96 | void HTS_fclose(HTS_File * fp); 97 | 98 | /* HTS_fgetc: wrapper for fgetc */ 99 | int HTS_fgetc(HTS_File * fp); 100 | 101 | /* HTS_feof: wrapper for feof */ 102 | int HTS_feof(HTS_File * fp); 103 | 104 | /* HTS_fseek: wrapper for fseek */ 105 | int HTS_fseek(HTS_File * fp, long offset, int origin); 106 | 107 | /* HTS_ftell: wrapper for ftell */ 108 | size_t HTS_ftell(HTS_File * fp); 109 | 110 | /* HTS_fread_big_endian: fread with byteswap */ 111 | size_t HTS_fread_big_endian(void *buf, size_t size, size_t n, HTS_File * fp); 112 | 113 | /* HTS_fread_little_endian: fread with byteswap */ 114 | size_t HTS_fread_little_endian(void *buf, size_t size, size_t n, HTS_File * fp); 115 | 116 | /* HTS_fwrite_little_endian: fwrite with byteswap */ 117 | size_t HTS_fwrite_little_endian(const void *buf, size_t size, size_t n, FILE * fp); 118 | 119 | /* HTS_get_pattern_token: get pattern token (single/double quote can be used) */ 120 | HTS_Boolean HTS_get_pattern_token(HTS_File * fp, char *buff); 121 | 122 | /* HTS_get_token: get token from file pointer (separators are space,tab,line break) */ 123 | HTS_Boolean HTS_get_token_from_fp(HTS_File * fp, char *buff); 124 | 125 | /* HTS_get_token: get token from file pointer with specified separator */ 126 | HTS_Boolean HTS_get_token_from_fp_with_separator(HTS_File * fp, char *buff, char separator); 127 | 128 | /* HTS_get_token_from_string: get token from string (separator are space,tab,line break) */ 129 | HTS_Boolean HTS_get_token_from_string(const char *string, size_t * index, char *buff); 130 | 131 | /* HTS_get_token_from_string_with_separator: get token from string with specified separator */ 132 | HTS_Boolean HTS_get_token_from_string_with_separator(const char *str, size_t * index, char *buff, char separator); 133 | 134 | /* HTS_calloc: wrapper for calloc */ 135 | void *HTS_calloc(const size_t num, const size_t size); 136 | 137 | /* HTS_strdup: wrapper for strdup */ 138 | char *HTS_strdup(const char *string); 139 | 140 | /* HTS_calloc_matrix: allocate double matrix */ 141 | double **HTS_alloc_matrix(size_t x, size_t y); 142 | 143 | /* HTS_free_matrix: free double matrix */ 144 | void HTS_free_matrix(double **p, size_t x); 145 | 146 | /* HTS_Free: wrapper for free */ 147 | void HTS_free(void *p); 148 | 149 | /* HTS_error: output error message */ 150 | void HTS_error(int error, const char *message, ...); 151 | 152 | /* audio ----------------------------------------------------------- */ 153 | 154 | /* HTS_Audio_initialize: initialize audio */ 155 | void HTS_Audio_initialize(HTS_Audio * audio); 156 | 157 | /* HTS_Audio_set_parameter: set parameters for audio */ 158 | void HTS_Audio_set_parameter(HTS_Audio * audio, size_t sampling_frequency, size_t max_buff_size); 159 | 160 | /* HTS_Audio_write: send data to audio */ 161 | void HTS_Audio_write(HTS_Audio * audio, short data); 162 | 163 | /* HTS_Audio_flush: flush remain data */ 164 | void HTS_Audio_flush(HTS_Audio * audio); 165 | 166 | /* HTS_Audio_clear: free audio */ 167 | void HTS_Audio_clear(HTS_Audio * audio); 168 | 169 | /* model ----------------------------------------------------------- */ 170 | 171 | /* HTS_ModelSet_initialize: initialize model set */ 172 | void HTS_ModelSet_initialize(HTS_ModelSet * ms); 173 | 174 | /* HTS_ModelSet_load: load HTS voices */ 175 | HTS_Boolean HTS_ModelSet_load(HTS_ModelSet * ms, char **voices, size_t num_voices); 176 | 177 | /* HTS_ModelSet_get_sampling_frequency: get sampling frequency of HTS voices */ 178 | size_t HTS_ModelSet_get_sampling_frequency(HTS_ModelSet * ms); 179 | 180 | /* HTS_ModelSet_get_fperiod: get frame period of HTS voices */ 181 | size_t HTS_ModelSet_get_fperiod(HTS_ModelSet * ms); 182 | 183 | /* HTS_ModelSet_get_fperiod: get stream option */ 184 | const char *HTS_ModelSet_get_option(HTS_ModelSet * ms, size_t stream_index); 185 | 186 | /* HTS_ModelSet_get_gv_flag: get GV flag */ 187 | HTS_Boolean HTS_ModelSet_get_gv_flag(HTS_ModelSet * ms, const char *string); 188 | 189 | /* HTS_ModelSet_get_nstate: get number of state */ 190 | size_t HTS_ModelSet_get_nstate(HTS_ModelSet * ms); 191 | 192 | /* HTS_Engine_get_fullcontext_label_format: get full-context label format */ 193 | const char *HTS_ModelSet_get_fullcontext_label_format(HTS_ModelSet * ms); 194 | 195 | /* HTS_Engine_get_fullcontext_label_version: get full-context label version */ 196 | const char *HTS_ModelSet_get_fullcontext_label_version(HTS_ModelSet * ms); 197 | 198 | /* HTS_ModelSet_get_nstream: get number of stream */ 199 | size_t HTS_ModelSet_get_nstream(HTS_ModelSet * ms); 200 | 201 | /* HTS_ModelSet_get_nvoices: get number of HTS voices */ 202 | size_t HTS_ModelSet_get_nvoices(HTS_ModelSet * ms); 203 | 204 | /* HTS_ModelSet_get_vector_length: get vector length */ 205 | size_t HTS_ModelSet_get_vector_length(HTS_ModelSet * ms, size_t stream_index); 206 | 207 | /* HTS_ModelSet_is_msd: get MSD flag */ 208 | HTS_Boolean HTS_ModelSet_is_msd(HTS_ModelSet * ms, size_t stream_index); 209 | 210 | /* HTS_ModelSet_get_window_size: get dynamic window size */ 211 | size_t HTS_ModelSet_get_window_size(HTS_ModelSet * ms, size_t stream_index); 212 | 213 | /* HTS_ModelSet_get_window_left_width: get left width of dynamic window */ 214 | int HTS_ModelSet_get_window_left_width(HTS_ModelSet * ms, size_t stream_index, size_t window_index); 215 | 216 | /* HTS_ModelSet_get_window_right_width: get right width of dynamic window */ 217 | int HTS_ModelSet_get_window_right_width(HTS_ModelSet * ms, size_t stream_index, size_t window_index); 218 | 219 | /* HTS_ModelSet_get_window_coefficient: get coefficient of dynamic window */ 220 | double HTS_ModelSet_get_window_coefficient(HTS_ModelSet * ms, size_t stream_index, size_t window_index, size_t coefficient_index); 221 | 222 | /* HTS_ModelSet_get_window_max_width: get max width of dynamic window */ 223 | size_t HTS_ModelSet_get_window_max_width(HTS_ModelSet * ms, size_t stream_index); 224 | 225 | /* HTS_ModelSet_use_gv: get GV flag */ 226 | HTS_Boolean HTS_ModelSet_use_gv(HTS_ModelSet * ms, size_t stream_index); 227 | 228 | /* HTS_ModelSet_get_duration_index: get index of duration tree and PDF */ 229 | void HTS_ModelSet_get_duration_index(HTS_ModelSet * ms, size_t voice_index, const char *string, size_t * tree_index, size_t * pdf_index); 230 | 231 | /* HTS_ModelSet_get_duration: get duration using interpolation weight */ 232 | void HTS_ModelSet_get_duration(HTS_ModelSet * ms, const char *string, const double *iw, double *mean, double *vari); 233 | 234 | /* HTS_ModelSet_get_parameter_index: get index of parameter tree and PDF */ 235 | void HTS_ModelSet_get_parameter_index(HTS_ModelSet * ms, size_t voice_index, size_t stream_index, size_t state_index, const char *string, size_t * tree_index, size_t * pdf_index); 236 | 237 | /* HTS_ModelSet_get_parameter: get parameter using interpolation weight */ 238 | void HTS_ModelSet_get_parameter(HTS_ModelSet * ms, size_t stream_index, size_t state_index, const char *string, const double *const *iw, double *mean, double *vari, double *msd); 239 | 240 | void HTS_ModelSet_get_gv_index(HTS_ModelSet * ms, size_t voice_index, size_t stream_index, const char *string, size_t * tree_index, size_t * pdf_index); 241 | 242 | /* HTS_ModelSet_get_gv: get GV using interpolation weight */ 243 | void HTS_ModelSet_get_gv(HTS_ModelSet * ms, size_t stream_index, const char *string, const double *const *iw, double *mean, double *vari); 244 | 245 | /* HTS_ModelSet_clear: free model set */ 246 | void HTS_ModelSet_clear(HTS_ModelSet * ms); 247 | 248 | /* label ----------------------------------------------------------- */ 249 | 250 | /* HTS_Label_initialize: initialize label */ 251 | void HTS_Label_initialize(HTS_Label * label); 252 | 253 | /* HTS_Label_load_from_fn: load label from file name */ 254 | void HTS_Label_load_from_fn(HTS_Label * label, size_t sampling_rate, size_t fperiod, const char *fn); 255 | 256 | /* HTS_Label_load_from_strings: load label list from string list */ 257 | void HTS_Label_load_from_strings(HTS_Label * label, size_t sampling_rate, size_t fperiod, char **lines, size_t num_lines); 258 | 259 | /* HTS_Label_get_size: get number of label string */ 260 | size_t HTS_Label_get_size(HTS_Label * label); 261 | 262 | /* HTS_Label_get_string: get label string */ 263 | const char *HTS_Label_get_string(HTS_Label * label, size_t index); 264 | 265 | /* HTS_Label_get_start_frame: get start frame */ 266 | double HTS_Label_get_start_frame(HTS_Label * label, size_t index); 267 | 268 | /* HTS_Label_get_end_frame: get end frame */ 269 | double HTS_Label_get_end_frame(HTS_Label * label, size_t index); 270 | 271 | /* HTS_Label_clear: free label */ 272 | void HTS_Label_clear(HTS_Label * label); 273 | 274 | /* sstream --------------------------------------------------------- */ 275 | 276 | /* HTS_SStreamSet_initialize: initialize state stream set */ 277 | void HTS_SStreamSet_initialize(HTS_SStreamSet * sss); 278 | 279 | /* HTS_SStreamSet_create: parse label and determine state duration */ 280 | HTS_Boolean HTS_SStreamSet_create(HTS_SStreamSet * sss, HTS_ModelSet * ms, HTS_Label * label, HTS_Boolean phoneme_alignment_flag, double speed, double *duration_iw, double **parameter_iw, double **gv_iw); 281 | 282 | /* HTS_SStreamSet_get_nstream: get number of stream */ 283 | size_t HTS_SStreamSet_get_nstream(HTS_SStreamSet * sss); 284 | 285 | /* HTS_SStreamSet_get_vector_length: get vector length */ 286 | size_t HTS_SStreamSet_get_vector_length(HTS_SStreamSet * sss, size_t stream_index); 287 | 288 | /* HTS_SStreamSet_is_msd: get MSD flag */ 289 | HTS_Boolean HTS_SStreamSet_is_msd(HTS_SStreamSet * sss, size_t stream_index); 290 | 291 | /* HTS_SStreamSet_get_total_state: get total number of state */ 292 | size_t HTS_SStreamSet_get_total_state(HTS_SStreamSet * sss); 293 | 294 | /* HTS_SStreamSet_get_total_frame: get total number of frame */ 295 | size_t HTS_SStreamSet_get_total_frame(HTS_SStreamSet * sss); 296 | 297 | /* HTS_SStreamSet_get_msd: get msd parameter */ 298 | double HTS_SStreamSet_get_msd(HTS_SStreamSet * sss, size_t stream_index, size_t state_index); 299 | 300 | /* HTS_SStreamSet_window_size: get dynamic window size */ 301 | size_t HTS_SStreamSet_get_window_size(HTS_SStreamSet * sss, size_t stream_index); 302 | 303 | /* HTS_SStreamSet_get_window_left_width: get left width of dynamic window */ 304 | int HTS_SStreamSet_get_window_left_width(HTS_SStreamSet * sss, size_t stream_index, size_t window_index); 305 | 306 | /* HTS_SStreamSet_get_window_right_width: get right width of dynamic window */ 307 | int HTS_SStreamSet_get_window_right_width(HTS_SStreamSet * sss, size_t stream_index, size_t window_index); 308 | 309 | /* HTS_SStreamSet_get_window_coefficient: get coefficient of dynamic window */ 310 | double HTS_SStreamSet_get_window_coefficient(HTS_SStreamSet * sss, size_t stream_index, size_t window_index, int coefficient_index); 311 | 312 | /* HTS_SStreamSet_get_window_max_width: get max width of dynamic window */ 313 | size_t HTS_SStreamSet_get_window_max_width(HTS_SStreamSet * sss, size_t stream_index); 314 | 315 | /* HTS_SStreamSet_use_gv: get GV flag */ 316 | HTS_Boolean HTS_SStreamSet_use_gv(HTS_SStreamSet * sss, size_t stream_index); 317 | 318 | /* HTS_SStreamSet_get_duration: get state duration */ 319 | size_t HTS_SStreamSet_get_duration(HTS_SStreamSet * sss, size_t state_index); 320 | 321 | /* HTS_SStreamSet_get_mean: get mean parameter */ 322 | double HTS_SStreamSet_get_mean(HTS_SStreamSet * sss, size_t stream_index, size_t state_index, size_t vector_index); 323 | 324 | /* HTS_SStreamSet_set_mean: set mean parameter */ 325 | void HTS_SStreamSet_set_mean(HTS_SStreamSet * sss, size_t stream_index, size_t state_index, size_t vector_index, double f); 326 | 327 | /* HTS_SStreamSet_get_vari: get variance parameter */ 328 | double HTS_SStreamSet_get_vari(HTS_SStreamSet * sss, size_t stream_index, size_t state_index, size_t vector_index); 329 | 330 | /* HTS_SStreamSet_set_vari: set variance parameter */ 331 | void HTS_SStreamSet_set_vari(HTS_SStreamSet * sss, size_t stream_index, size_t state_index, size_t vector_index, double f); 332 | 333 | /* HTS_SStreamSet_get_gv_mean: get GV mean parameter */ 334 | double HTS_SStreamSet_get_gv_mean(HTS_SStreamSet * sss, size_t stream_index, size_t vector_index); 335 | 336 | /* HTS_SStreamSet_get_gv_mean: get GV variance parameter */ 337 | double HTS_SStreamSet_get_gv_vari(HTS_SStreamSet * sss, size_t stream_index, size_t vector_index); 338 | 339 | /* HTS_SStreamSet_set_gv_switch: set GV switch */ 340 | void HTS_SStreamSet_set_gv_switch(HTS_SStreamSet * sss, size_t stream_index, size_t state_index, HTS_Boolean i); 341 | 342 | /* HTS_SStreamSet_get_gv_switch: get GV switch */ 343 | HTS_Boolean HTS_SStreamSet_get_gv_switch(HTS_SStreamSet * sss, size_t stream_index, size_t state_index); 344 | 345 | /* HTS_SStreamSet_clear: free state stream set */ 346 | void HTS_SStreamSet_clear(HTS_SStreamSet * sss); 347 | 348 | /* pstream --------------------------------------------------------- */ 349 | 350 | /* check variance in finv() */ 351 | #define INFTY ((double) 1.0e+38) 352 | #define INFTY2 ((double) 1.0e+19) 353 | #define INVINF ((double) 1.0e-38) 354 | #define INVINF2 ((double) 1.0e-19) 355 | 356 | /* GV */ 357 | #define STEPINIT 0.1 358 | #define STEPDEC 0.5 359 | #define STEPINC 1.2 360 | #define W1 1.0 361 | #define W2 1.0 362 | #define GV_MAX_ITERATION 5 363 | 364 | /* HTS_PStreamSet_initialize: initialize parameter stream set */ 365 | void HTS_PStreamSet_initialize(HTS_PStreamSet * pss); 366 | 367 | /* HTS_PStreamSet_create: parameter generation using GV weight */ 368 | HTS_Boolean HTS_PStreamSet_create(HTS_PStreamSet * pss, HTS_SStreamSet * sss, double *msd_threshold, double *gv_weight); 369 | 370 | /* HTS_PStreamSet_get_nstream: get number of stream */ 371 | size_t HTS_PStreamSet_get_nstream(HTS_PStreamSet * pss); 372 | 373 | /* HTS_PStreamSet_get_static_length: get features length */ 374 | size_t HTS_PStreamSet_get_vector_length(HTS_PStreamSet * pss, size_t stream_index); 375 | 376 | /* HTS_PStreamSet_get_total_frame: get total number of frame */ 377 | size_t HTS_PStreamSet_get_total_frame(HTS_PStreamSet * pss); 378 | 379 | /* HTS_PStreamSet_get_parameter: get parameter */ 380 | double HTS_PStreamSet_get_parameter(HTS_PStreamSet * pss, size_t stream_index, size_t frame_index, size_t vector_index); 381 | 382 | /* HTS_PStreamSet_get_parameter_vector: get parameter vector */ 383 | double *HTS_PStreamSet_get_parameter_vector(HTS_PStreamSet * pss, size_t stream_index, size_t frame_index); 384 | 385 | /* HTS_PStreamSet_get_msd_flag: get generated MSD flag per frame */ 386 | HTS_Boolean HTS_PStreamSet_get_msd_flag(HTS_PStreamSet * pss, size_t stream_index, size_t frame_index); 387 | 388 | /* HTS_PStreamSet_is_msd: get MSD flag */ 389 | HTS_Boolean HTS_PStreamSet_is_msd(HTS_PStreamSet * pss, size_t stream_index); 390 | 391 | /* HTS_PStreamSet_clear: free parameter stream set */ 392 | void HTS_PStreamSet_clear(HTS_PStreamSet * pss); 393 | 394 | /* gstream --------------------------------------------------------- */ 395 | 396 | /* HTS_GStreamSet_initialize: initialize generated parameter stream set */ 397 | void HTS_GStreamSet_initialize(HTS_GStreamSet * gss); 398 | 399 | /* HTS_GStreamSet_create: generate speech */ 400 | HTS_Boolean HTS_GStreamSet_create(HTS_GStreamSet * gss, HTS_PStreamSet * pss, size_t stage, HTS_Boolean use_log_gain, size_t sampling_rate, size_t fperiod, double alpha, double beta, HTS_Boolean * stop, double volume, HTS_Audio * audio); 401 | 402 | /* HTS_GStreamSet_get_total_nsamples: get total number of sample */ 403 | size_t HTS_GStreamSet_get_total_nsamples(HTS_GStreamSet * gss); 404 | 405 | /* HTS_GStreamSet_get_total_frame: get total number of frame */ 406 | size_t HTS_GStreamSet_get_total_frame(HTS_GStreamSet * gss); 407 | 408 | /* HTS_GStreamSet_get_static_length: get features length */ 409 | size_t HTS_GStreamSet_get_vector_length(HTS_GStreamSet * gss, size_t stream_index); 410 | 411 | /* HTS_GStreamSet_get_speech: get synthesized speech parameter */ 412 | double HTS_GStreamSet_get_speech(HTS_GStreamSet * gss, size_t sample_index); 413 | 414 | /* HTS_GStreamSet_get_parameter: get generated parameter */ 415 | double HTS_GStreamSet_get_parameter(HTS_GStreamSet * gss, size_t stream_index, size_t frame_index, size_t vector_index); 416 | 417 | /* HTS_GStreamSet_clear: free generated parameter stream set */ 418 | void HTS_GStreamSet_clear(HTS_GStreamSet * gss); 419 | 420 | /* vocoder --------------------------------------------------------- */ 421 | 422 | #ifndef LZERO 423 | #define LZERO (-1.0e+10) /* ~log(0) */ 424 | #endif /* !LZERO */ 425 | 426 | #ifndef ZERO 427 | #define ZERO (1.0e-10) /* ~(0) */ 428 | #endif /* !ZERO */ 429 | 430 | #ifndef PI 431 | #define PI 3.14159265358979323846 432 | #endif /* !PI */ 433 | 434 | #ifndef PI2 435 | #define PI2 6.28318530717958647692 436 | #endif /* !PI2 */ 437 | 438 | #define RANDMAX 32767 439 | 440 | #define SEED 1 441 | #define B0 0x00000001 442 | #define B28 0x10000000 443 | #define B31 0x80000000 444 | #define B31_ 0x7fffffff 445 | #define Z 0x00000000 446 | 447 | #ifdef HTS_EMBEDDED 448 | #define GAUSS FALSE 449 | #define PADEORDER 4 /* pade order (for MLSA filter) */ 450 | #define IRLENG 384 /* length of impulse response */ 451 | #else 452 | #define GAUSS TRUE 453 | #define PADEORDER 5 454 | #define IRLENG 576 455 | #endif /* HTS_EMBEDDED */ 456 | 457 | #define CHECK_LSP_STABILITY_MIN 0.25 458 | #define CHECK_LSP_STABILITY_NUM 4 459 | 460 | /* for MGLSA filter */ 461 | #define NORMFLG1 TRUE 462 | #define NORMFLG2 FALSE 463 | #define MULGFLG1 TRUE 464 | #define MULGFLG2 FALSE 465 | #define NGAIN FALSE 466 | 467 | /* HTS_Vocoder: structure for setting of vocoder */ 468 | typedef struct _HTS_Vocoder { 469 | HTS_Boolean is_first; 470 | size_t stage; /* Gamma=-1/stage: if stage=0 then Gamma=0 */ 471 | double gamma; /* Gamma */ 472 | HTS_Boolean use_log_gain; /* log gain flag (for LSP) */ 473 | size_t fprd; /* frame shift */ 474 | unsigned long next; /* temporary variable for random generator */ 475 | HTS_Boolean gauss; /* flag to use Gaussian noise */ 476 | double rate; /* sampling rate */ 477 | double pitch_of_curr_point; /* used in excitation generation */ 478 | double pitch_counter; /* used in excitation generation */ 479 | double pitch_inc_per_point; /* used in excitation generation */ 480 | double *excite_ring_buff; /* used in excitation generation */ 481 | size_t excite_buff_size; /* used in excitation generation */ 482 | size_t excite_buff_index; /* used in excitation generation */ 483 | unsigned char sw; /* switch used in random generator */ 484 | int x; /* excitation signal */ 485 | double *freqt_buff; /* used in freqt */ 486 | size_t freqt_size; /* buffer size for freqt */ 487 | double *spectrum2en_buff; /* used in spectrum2en */ 488 | size_t spectrum2en_size; /* buffer size for spectrum2en */ 489 | double r1, r2, s; /* used in random generator */ 490 | double *postfilter_buff; /* used in postfiltering */ 491 | size_t postfilter_size; /* buffer size for postfiltering */ 492 | double *c, *cc, *cinc, *d1; /* used in the MLSA/MGLSA filter */ 493 | double *lsp2lpc_buff; /* used in lsp2lpc */ 494 | size_t lsp2lpc_size; /* buffer size of lsp2lpc */ 495 | double *gc2gc_buff; /* used in gc2gc */ 496 | size_t gc2gc_size; /* buffer size for gc2gc */ 497 | } HTS_Vocoder; 498 | 499 | /* HTS_Vocoder_initialize: initialize vocoder */ 500 | void HTS_Vocoder_initialize(HTS_Vocoder * v, size_t m, size_t stage, HTS_Boolean use_log_gain, size_t rate, size_t fperiod); 501 | 502 | /* HTS_Vocoder_synthesize: pulse/noise excitation and MLSA/MGLSA filster based waveform synthesis */ 503 | void HTS_Vocoder_synthesize(HTS_Vocoder * v, size_t m, double lf0, double *spectrum, size_t nlpf, double *lpf, double alpha, double beta, double volume, double *rawdata, HTS_Audio * audio); 504 | 505 | /* HTS_Vocoder_clear: clear vocoder */ 506 | void HTS_Vocoder_clear(HTS_Vocoder * v); 507 | 508 | HTS_HIDDEN_H_END; 509 | 510 | #endif /* !HTS_HIDDEN_H */ 511 | -------------------------------------------------------------------------------- /src/lib/HTS_label.c: -------------------------------------------------------------------------------- 1 | /* ----------------------------------------------------------------- */ 2 | /* The HMM-Based Speech Synthesis Engine "hts_engine API" */ 3 | /* developed by HTS Working Group */ 4 | /* http://hts-engine.sourceforge.net/ */ 5 | /* ----------------------------------------------------------------- */ 6 | /* */ 7 | /* Copyright (c) 2001-2014 Nagoya Institute of Technology */ 8 | /* Department of Computer Science */ 9 | /* */ 10 | /* 2001-2008 Tokyo Institute of Technology */ 11 | /* Interdisciplinary Graduate School of */ 12 | /* Science and Engineering */ 13 | /* */ 14 | /* All rights reserved. */ 15 | /* */ 16 | /* Redistribution and use in source and binary forms, with or */ 17 | /* without modification, are permitted provided that the following */ 18 | /* conditions are met: */ 19 | /* */ 20 | /* - Redistributions of source code must retain the above copyright */ 21 | /* notice, this list of conditions and the following disclaimer. */ 22 | /* - Redistributions in binary form must reproduce the above */ 23 | /* copyright notice, this list of conditions and the following */ 24 | /* disclaimer in the documentation and/or other materials provided */ 25 | /* with the distribution. */ 26 | /* - Neither the name of the HTS working group nor the names of its */ 27 | /* contributors may be used to endorse or promote products derived */ 28 | /* from this software without specific prior written permission. */ 29 | /* */ 30 | /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */ 31 | /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */ 32 | /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ 33 | /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ 34 | /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */ 35 | /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, */ 36 | /* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED */ 37 | /* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, */ 38 | /* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */ 39 | /* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, */ 40 | /* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY */ 41 | /* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ 42 | /* POSSIBILITY OF SUCH DAMAGE. */ 43 | /* ----------------------------------------------------------------- */ 44 | 45 | #ifndef HTS_LABEL_C 46 | #define HTS_LABEL_C 47 | 48 | #ifdef __cplusplus 49 | #define HTS_LABEL_C_START extern "C" { 50 | #define HTS_LABEL_C_END } 51 | #else 52 | #define HTS_LABEL_C_START 53 | #define HTS_LABEL_C_END 54 | #endif /* __CPLUSPLUS */ 55 | 56 | HTS_LABEL_C_START; 57 | 58 | #include /* for atof() */ 59 | #include /* for isgraph(),isdigit() */ 60 | 61 | /* hts_engine libraries */ 62 | #include "HTS_hidden.h" 63 | 64 | static HTS_Boolean isdigit_string(char *str) 65 | { 66 | int i; 67 | 68 | if (sscanf(str, "%d", &i) == 1) 69 | return TRUE; 70 | else 71 | return FALSE; 72 | } 73 | 74 | /* HTS_Label_initialize: initialize label */ 75 | void HTS_Label_initialize(HTS_Label * label) 76 | { 77 | label->head = NULL; 78 | label->size = 0; 79 | } 80 | 81 | /* HTS_Label_check_time: check label */ 82 | static void HTS_Label_check_time(HTS_Label * label) 83 | { 84 | HTS_LabelString *lstring = label->head; 85 | HTS_LabelString *next = NULL; 86 | 87 | if (lstring) 88 | lstring->start = 0.0; 89 | while (lstring) { 90 | next = lstring->next; 91 | if (!next) 92 | break; 93 | if (lstring->end < 0.0 && next->start >= 0.0) 94 | lstring->end = next->start; 95 | else if (lstring->end >= 0.0 && next->start < 0.0) 96 | next->start = lstring->end; 97 | if (lstring->start < 0.0) 98 | lstring->start = -1.0; 99 | if (lstring->end < 0.0) 100 | lstring->end = -1.0; 101 | lstring = next; 102 | } 103 | } 104 | 105 | /* HTS_Label_load: load label */ 106 | static void HTS_Label_load(HTS_Label * label, size_t sampling_rate, size_t fperiod, HTS_File * fp) 107 | { 108 | char buff[HTS_MAXBUFLEN]; 109 | HTS_LabelString *lstring = NULL; 110 | double start, end; 111 | const double rate = (double) sampling_rate / ((double) fperiod * 1e+7); 112 | 113 | if (label->head || label->size != 0) { 114 | HTS_error(1, "HTS_Label_load_from_fp: label is not initialized.\n"); 115 | return; 116 | } 117 | 118 | /* parse label file */ 119 | while (HTS_get_token_from_fp(fp, buff)) { 120 | if (!isgraph((int) buff[0])) 121 | break; 122 | label->size++; 123 | 124 | if (lstring) { 125 | lstring->next = (HTS_LabelString *) HTS_calloc(1, sizeof(HTS_LabelString)); 126 | lstring = lstring->next; 127 | } else { /* first time */ 128 | lstring = (HTS_LabelString *) HTS_calloc(1, sizeof(HTS_LabelString)); 129 | label->head = lstring; 130 | } 131 | if (isdigit_string(buff)) { /* has frame infomation */ 132 | start = atof(buff); 133 | HTS_get_token_from_fp(fp, buff); 134 | end = atof(buff); 135 | HTS_get_token_from_fp(fp, buff); 136 | lstring->start = rate * start; 137 | lstring->end = rate * end; 138 | } else { 139 | lstring->start = -1.0; 140 | lstring->end = -1.0; 141 | } 142 | lstring->next = NULL; 143 | lstring->name = HTS_strdup(buff); 144 | } 145 | HTS_Label_check_time(label); 146 | } 147 | 148 | /* HTS_Label_load_from_fn: load label from file name */ 149 | void HTS_Label_load_from_fn(HTS_Label * label, size_t sampling_rate, size_t fperiod, const char *fn) 150 | { 151 | HTS_File *fp = HTS_fopen_from_fn(fn, "r"); 152 | HTS_Label_load(label, sampling_rate, fperiod, fp); 153 | HTS_fclose(fp); 154 | } 155 | 156 | /* HTS_Label_load_from_strings: load label from strings */ 157 | void HTS_Label_load_from_strings(HTS_Label * label, size_t sampling_rate, size_t fperiod, char **lines, size_t num_lines) 158 | { 159 | char buff[HTS_MAXBUFLEN]; 160 | HTS_LabelString *lstring = NULL; 161 | size_t i; 162 | size_t data_index; 163 | double start, end; 164 | const double rate = (double) sampling_rate / ((double) fperiod * 1e+7); 165 | 166 | if (label->head || label->size != 0) { 167 | HTS_error(1, "HTS_Label_load_from_fp: label list is not initialized.\n"); 168 | return; 169 | } 170 | /* copy label */ 171 | for (i = 0; i < num_lines; i++) { 172 | if (!isgraph((int) lines[i][0])) 173 | break; 174 | label->size++; 175 | 176 | if (lstring) { 177 | lstring->next = (HTS_LabelString *) HTS_calloc(1, sizeof(HTS_LabelString)); 178 | lstring = lstring->next; 179 | } else { /* first time */ 180 | lstring = (HTS_LabelString *) HTS_calloc(1, sizeof(HTS_LabelString)); 181 | label->head = lstring; 182 | } 183 | data_index = 0; 184 | if (isdigit_string(lines[i])) { /* has frame infomation */ 185 | HTS_get_token_from_string(lines[i], &data_index, buff); 186 | start = atof(buff); 187 | HTS_get_token_from_string(lines[i], &data_index, buff); 188 | end = atof(buff); 189 | HTS_get_token_from_string(lines[i], &data_index, buff); 190 | lstring->name = HTS_strdup(buff); 191 | lstring->start = rate * start; 192 | lstring->end = rate * end; 193 | } else { 194 | lstring->start = -1.0; 195 | lstring->end = -1.0; 196 | lstring->name = HTS_strdup(lines[i]); 197 | } 198 | lstring->next = NULL; 199 | } 200 | HTS_Label_check_time(label); 201 | } 202 | 203 | /* HTS_Label_get_size: get number of label string */ 204 | size_t HTS_Label_get_size(HTS_Label * label) 205 | { 206 | return label->size; 207 | } 208 | 209 | /* HTS_Label_get_string: get label string */ 210 | const char *HTS_Label_get_string(HTS_Label * label, size_t index) 211 | { 212 | size_t i; 213 | HTS_LabelString *lstring = label->head; 214 | 215 | for (i = 0; i < index && lstring; i++) 216 | lstring = lstring->next; 217 | if (!lstring) 218 | return NULL; 219 | return lstring->name; 220 | } 221 | 222 | /* HTS_Label_get_start_frame: get start frame */ 223 | double HTS_Label_get_start_frame(HTS_Label * label, size_t index) 224 | { 225 | size_t i; 226 | HTS_LabelString *lstring = label->head; 227 | 228 | for (i = 0; i < index && lstring; i++) 229 | lstring = lstring->next; 230 | if (!lstring) 231 | return -1.0; 232 | return lstring->start; 233 | } 234 | 235 | /* HTS_Label_get_end_frame: get end frame */ 236 | double HTS_Label_get_end_frame(HTS_Label * label, size_t index) 237 | { 238 | size_t i; 239 | HTS_LabelString *lstring = label->head; 240 | 241 | for (i = 0; i < index && lstring; i++) 242 | lstring = lstring->next; 243 | if (!lstring) 244 | return -1.0; 245 | return lstring->end; 246 | } 247 | 248 | /* HTS_Label_clear: free label */ 249 | void HTS_Label_clear(HTS_Label * label) 250 | { 251 | HTS_LabelString *lstring, *next_lstring; 252 | 253 | for (lstring = label->head; lstring; lstring = next_lstring) { 254 | next_lstring = lstring->next; 255 | HTS_free(lstring->name); 256 | HTS_free(lstring); 257 | } 258 | HTS_Label_initialize(label); 259 | } 260 | 261 | HTS_LABEL_C_END; 262 | 263 | #endif /* !HTS_LABEL_C */ 264 | -------------------------------------------------------------------------------- /src/lib/HTS_misc.c: -------------------------------------------------------------------------------- 1 | /* ----------------------------------------------------------------- */ 2 | /* The HMM-Based Speech Synthesis Engine "hts_engine API" */ 3 | /* developed by HTS Working Group */ 4 | /* http://hts-engine.sourceforge.net/ */ 5 | /* ----------------------------------------------------------------- */ 6 | /* */ 7 | /* Copyright (c) 2001-2014 Nagoya Institute of Technology */ 8 | /* Department of Computer Science */ 9 | /* */ 10 | /* 2001-2008 Tokyo Institute of Technology */ 11 | /* Interdisciplinary Graduate School of */ 12 | /* Science and Engineering */ 13 | /* */ 14 | /* All rights reserved. */ 15 | /* */ 16 | /* Redistribution and use in source and binary forms, with or */ 17 | /* without modification, are permitted provided that the following */ 18 | /* conditions are met: */ 19 | /* */ 20 | /* - Redistributions of source code must retain the above copyright */ 21 | /* notice, this list of conditions and the following disclaimer. */ 22 | /* - Redistributions in binary form must reproduce the above */ 23 | /* copyright notice, this list of conditions and the following */ 24 | /* disclaimer in the documentation and/or other materials provided */ 25 | /* with the distribution. */ 26 | /* - Neither the name of the HTS working group nor the names of its */ 27 | /* contributors may be used to endorse or promote products derived */ 28 | /* from this software without specific prior written permission. */ 29 | /* */ 30 | /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */ 31 | /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */ 32 | /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ 33 | /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ 34 | /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */ 35 | /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, */ 36 | /* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED */ 37 | /* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, */ 38 | /* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */ 39 | /* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, */ 40 | /* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY */ 41 | /* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ 42 | /* POSSIBILITY OF SUCH DAMAGE. */ 43 | /* ----------------------------------------------------------------- */ 44 | 45 | #ifndef HTS_MISC_C 46 | #define HTS_MISC_C 47 | 48 | #ifdef __cplusplus 49 | #define HTS_MISC_C_START extern "C" { 50 | #define HTS_MISC_C_END } 51 | #else 52 | #define HTS_MISC_C_START 53 | #define HTS_MISC_C_END 54 | #endif /* __CPLUSPLUS */ 55 | 56 | HTS_MISC_C_START; 57 | 58 | #include /* for exit(),calloc(),free() */ 59 | #include /* for va_list */ 60 | #include /* for strcpy(),strlen() */ 61 | 62 | /* hts_engine libraries */ 63 | #include "HTS_hidden.h" 64 | 65 | #ifdef FESTIVAL 66 | #include "EST_walloc.h" 67 | #endif /* FESTIVAL */ 68 | 69 | #define HTS_FILE 0 70 | #define HTS_DATA 1 71 | 72 | typedef struct _HTS_Data { 73 | unsigned char *data; 74 | size_t size; 75 | size_t index; 76 | } HTS_Data; 77 | 78 | /* HTS_fopen_from_fn: wrapper for fopen */ 79 | HTS_File *HTS_fopen_from_fn(const char *name, const char *opt) 80 | { 81 | HTS_File *fp = (HTS_File *) HTS_calloc(1, sizeof(HTS_File)); 82 | 83 | fp->type = HTS_FILE; 84 | fp->pointer = (void *) fopen(name, opt); 85 | 86 | if (fp->pointer == NULL) { 87 | HTS_error(0, "HTS_fopen: Cannot open %s.\n", name); 88 | HTS_free(fp); 89 | return NULL; 90 | } 91 | 92 | return fp; 93 | } 94 | 95 | /* HTS_fopen_from_fp: wrapper for fopen */ 96 | HTS_File *HTS_fopen_from_fp(HTS_File * fp, size_t size) 97 | { 98 | if (fp == NULL || size == 0) 99 | return NULL; 100 | else if (fp->type == HTS_FILE) { 101 | HTS_Data *d; 102 | HTS_File *f; 103 | d = (HTS_Data *) HTS_calloc(1, sizeof(HTS_Data)); 104 | d->data = (unsigned char *) HTS_calloc(size, sizeof(unsigned char)); 105 | d->size = size; 106 | d->index = 0; 107 | if (fread(d->data, sizeof(unsigned char), size, (FILE *) fp->pointer) != size) { 108 | free(d->data); 109 | free(d); 110 | return NULL; 111 | } 112 | f = (HTS_File *) HTS_calloc(1, sizeof(HTS_File)); 113 | f->type = HTS_DATA; 114 | f->pointer = (void *) d; 115 | return f; 116 | } else if (fp->type == HTS_DATA) { 117 | HTS_File *f; 118 | HTS_Data *tmp1, *tmp2; 119 | tmp1 = (HTS_Data *) fp->pointer; 120 | if (tmp1->index + size > tmp1->size) 121 | return NULL; 122 | tmp2 = (HTS_Data *) HTS_calloc(1, sizeof(HTS_Data)); 123 | tmp2->data = (unsigned char *) HTS_calloc(size, sizeof(unsigned char)); 124 | tmp2->size = size; 125 | tmp2->index = 0; 126 | memcpy(tmp2->data, &tmp1->data[tmp1->index], size); 127 | tmp1->index += size; 128 | f = (HTS_File *) HTS_calloc(1, sizeof(HTS_File)); 129 | f->type = HTS_DATA; 130 | f->pointer = (void *) tmp2; 131 | return f; 132 | } 133 | 134 | HTS_error(0, "HTS_fopen_from_fp: Unknown file type.\n"); 135 | return NULL; 136 | } 137 | 138 | /* HTS_fopen_from_data: wrapper for fopen */ 139 | HTS_File *HTS_fopen_from_data(void *data, size_t size) 140 | { 141 | HTS_Data *d; 142 | HTS_File *f; 143 | 144 | if (data == NULL || size == 0) 145 | return NULL; 146 | 147 | d = (HTS_Data *) HTS_calloc(1, sizeof(HTS_Data)); 148 | d->data = (unsigned char *) HTS_calloc(size, sizeof(unsigned char)); 149 | d->size = size; 150 | d->index = 0; 151 | 152 | memcpy(d->data, data, size); 153 | 154 | f = (HTS_File *) HTS_calloc(1, sizeof(HTS_File)); 155 | f->type = HTS_DATA; 156 | f->pointer = (void *) d; 157 | 158 | return f; 159 | } 160 | 161 | /* HTS_fclose: wrapper for fclose */ 162 | void HTS_fclose(HTS_File * fp) 163 | { 164 | if (fp == NULL) { 165 | return; 166 | } else if (fp->type == HTS_FILE) { 167 | if (fp->pointer != NULL) 168 | fclose((FILE *) fp->pointer); 169 | HTS_free(fp); 170 | return; 171 | } else if (fp->type == HTS_DATA) { 172 | if (fp->pointer != NULL) { 173 | HTS_Data *d = (HTS_Data *) fp->pointer; 174 | if (d->data != NULL) 175 | HTS_free(d->data); 176 | HTS_free(d); 177 | } 178 | HTS_free(fp); 179 | return; 180 | } 181 | HTS_error(0, "HTS_fclose: Unknown file type.\n"); 182 | } 183 | 184 | /* HTS_fgetc: wrapper for fgetc */ 185 | int HTS_fgetc(HTS_File * fp) 186 | { 187 | if (fp == NULL) { 188 | return EOF; 189 | } else if (fp->type == HTS_FILE) { 190 | return fgetc((FILE *) fp->pointer); 191 | } else if (fp->type == HTS_DATA) { 192 | HTS_Data *d = (HTS_Data *) fp->pointer; 193 | if (d->size <= d->index) 194 | return EOF; 195 | return (int) d->data[d->index++]; 196 | } 197 | HTS_error(0, "HTS_fgetc: Unknown file type.\n"); 198 | return EOF; 199 | } 200 | 201 | /* HTS_feof: wrapper for feof */ 202 | int HTS_feof(HTS_File * fp) 203 | { 204 | if (fp == NULL) { 205 | return 1; 206 | } else if (fp->type == HTS_FILE) { 207 | return feof((FILE *) fp->pointer); 208 | } else if (fp->type == HTS_DATA) { 209 | HTS_Data *d = (HTS_Data *) fp->pointer; 210 | return d->size <= d->index ? 1 : 0; 211 | } 212 | HTS_error(0, "HTS_feof: Unknown file type.\n"); 213 | return 1; 214 | } 215 | 216 | /* HTS_fseek: wrapper for fseek */ 217 | int HTS_fseek(HTS_File * fp, long offset, int origin) 218 | { 219 | if (fp == NULL) { 220 | return 1; 221 | } else if (fp->type == HTS_FILE) { 222 | return fseek((FILE *) fp->pointer, offset, origin); 223 | } else if (fp->type == HTS_DATA) { 224 | HTS_Data *d = (HTS_Data *) fp->pointer; 225 | if (origin == SEEK_SET) { 226 | d->index = (size_t) offset; 227 | } else if (origin == SEEK_CUR) { 228 | d->index += offset; 229 | } else if (origin == SEEK_END) { 230 | d->index = d->size + offset; 231 | } else { 232 | return 1; 233 | } 234 | return 0; 235 | } 236 | HTS_error(0, "HTS_fseek: Unknown file type.\n"); 237 | return 1; 238 | } 239 | 240 | /* HTS_ftell: rapper for ftell */ 241 | size_t HTS_ftell(HTS_File * fp) 242 | { 243 | if (fp == NULL) { 244 | return 0; 245 | } else if (fp->type == HTS_FILE) { 246 | fpos_t pos; 247 | fgetpos((FILE *) fp->pointer, &pos); 248 | #if defined(_WIN32) || defined(__CYGWIN__) || defined(__APPLE__) || defined(__ANDROID__) 249 | return (size_t) pos; 250 | #else 251 | return (size_t) pos.__pos; 252 | #endif /* _WIN32 || __CYGWIN__ || __APPLE__ || __ANDROID__ */ 253 | } else if (fp->type == HTS_DATA) { 254 | HTS_Data *d = (HTS_Data *) fp->pointer; 255 | return d->index; 256 | } 257 | HTS_error(0, "HTS_ftell: Unknown file type.\n"); 258 | return 0; 259 | } 260 | 261 | /* HTS_fread: wrapper for fread */ 262 | static size_t HTS_fread(void *buf, size_t size, size_t n, HTS_File * fp) 263 | { 264 | if (fp == NULL || size == 0 || n == 0) { 265 | return 0; 266 | } 267 | if (fp->type == HTS_FILE) { 268 | return fread(buf, size, n, (FILE *) fp->pointer); 269 | } else if (fp->type == HTS_DATA) { 270 | HTS_Data *d = (HTS_Data *) fp->pointer; 271 | size_t i, length = size * n; 272 | unsigned char *c = (unsigned char *) buf; 273 | for (i = 0; i < length; i++) { 274 | if (d->index < d->size) 275 | c[i] = d->data[d->index++]; 276 | else 277 | break; 278 | } 279 | if (i == 0) 280 | return 0; 281 | else 282 | return i / size; 283 | } 284 | HTS_error(0, "HTS_fread: Unknown file type.\n"); 285 | return 0; 286 | } 287 | 288 | /* HTS_byte_swap: byte swap */ 289 | static void HTS_byte_swap(void *p, size_t size, size_t block) 290 | { 291 | char *q, tmp; 292 | size_t i, j; 293 | 294 | q = (char *) p; 295 | 296 | for (i = 0; i < block; i++) { 297 | for (j = 0; j < (size / 2); j++) { 298 | tmp = *(q + j); 299 | *(q + j) = *(q + (size - 1 - j)); 300 | *(q + (size - 1 - j)) = tmp; 301 | } 302 | q += size; 303 | } 304 | } 305 | 306 | /* HTS_fread_big_endian: fread with byteswap */ 307 | size_t HTS_fread_big_endian(void *buf, size_t size, size_t n, HTS_File * fp) 308 | { 309 | size_t block = HTS_fread(buf, size, n, fp); 310 | 311 | #ifdef WORDS_LITTLEENDIAN 312 | HTS_byte_swap(buf, size, block); 313 | #endif /* WORDS_LITTLEENDIAN */ 314 | 315 | return block; 316 | } 317 | 318 | /* HTS_fread_little_endian: fread with byteswap */ 319 | size_t HTS_fread_little_endian(void *buf, size_t size, size_t n, HTS_File * fp) 320 | { 321 | size_t block = HTS_fread(buf, size, n, fp); 322 | 323 | #ifdef WORDS_BIGENDIAN 324 | HTS_byte_swap(buf, size, block); 325 | #endif /* WORDS_BIGENDIAN */ 326 | 327 | return block; 328 | } 329 | 330 | /* HTS_fwrite_little_endian: fwrite with byteswap */ 331 | size_t HTS_fwrite_little_endian(const void *buf, size_t size, size_t n, FILE * fp) 332 | { 333 | #ifdef WORDS_BIGENDIAN 334 | HTS_byte_swap(buf, size, n * size); 335 | #endif /* WORDS_BIGENDIAN */ 336 | return fwrite(buf, size, n, fp); 337 | } 338 | 339 | /* HTS_get_pattern_token: get pattern token (single/double quote can be used) */ 340 | HTS_Boolean HTS_get_pattern_token(HTS_File * fp, char *buff) 341 | { 342 | char c; 343 | size_t i; 344 | HTS_Boolean squote = FALSE, dquote = FALSE; 345 | 346 | if (fp == NULL || HTS_feof(fp)) 347 | return FALSE; 348 | c = HTS_fgetc(fp); 349 | 350 | while (c == ' ' || c == '\n') { 351 | if (HTS_feof(fp)) 352 | return FALSE; 353 | c = HTS_fgetc(fp); 354 | } 355 | 356 | if (c == '\'') { /* single quote case */ 357 | if (HTS_feof(fp)) 358 | return FALSE; 359 | c = HTS_fgetc(fp); 360 | squote = TRUE; 361 | } 362 | 363 | if (c == '\"') { /*double quote case */ 364 | if (HTS_feof(fp)) 365 | return FALSE; 366 | c = HTS_fgetc(fp); 367 | dquote = TRUE; 368 | } 369 | 370 | if (c == ',') { /*special character ',' */ 371 | strcpy(buff, ","); 372 | return TRUE; 373 | } 374 | 375 | i = 0; 376 | while (1) { 377 | buff[i++] = c; 378 | c = HTS_fgetc(fp); 379 | if (squote && c == '\'') 380 | break; 381 | if (dquote && c == '\"') 382 | break; 383 | if (!squote && !dquote) { 384 | if (c == ' ') 385 | break; 386 | if (c == '\n') 387 | break; 388 | if (HTS_feof(fp)) 389 | break; 390 | } 391 | } 392 | 393 | buff[i] = '\0'; 394 | return TRUE; 395 | } 396 | 397 | /* HTS_get_token: get token from file pointer (separators are space, tab, and line break) */ 398 | HTS_Boolean HTS_get_token_from_fp(HTS_File * fp, char *buff) 399 | { 400 | char c; 401 | size_t i; 402 | 403 | if (fp == NULL || HTS_feof(fp)) 404 | return FALSE; 405 | c = HTS_fgetc(fp); 406 | while (c == ' ' || c == '\n' || c == '\t') { 407 | if (HTS_feof(fp)) 408 | return FALSE; 409 | c = HTS_fgetc(fp); 410 | if (c == EOF) 411 | return FALSE; 412 | } 413 | 414 | for (i = 0; c != ' ' && c != '\n' && c != '\t';) { 415 | buff[i++] = c; 416 | if (HTS_feof(fp)) 417 | break; 418 | c = HTS_fgetc(fp); 419 | if (c == EOF) 420 | break; 421 | } 422 | 423 | buff[i] = '\0'; 424 | return TRUE; 425 | } 426 | 427 | /* HTS_get_token_with_separator: get token from file pointer with specified separator */ 428 | HTS_Boolean HTS_get_token_from_fp_with_separator(HTS_File * fp, char *buff, char separator) 429 | { 430 | char c; 431 | size_t i; 432 | 433 | if (fp == NULL || HTS_feof(fp)) 434 | return FALSE; 435 | c = HTS_fgetc(fp); 436 | while (c == separator) { 437 | if (HTS_feof(fp)) 438 | return FALSE; 439 | c = HTS_fgetc(fp); 440 | if (c == EOF) 441 | return FALSE; 442 | } 443 | 444 | for (i = 0; c != separator;) { 445 | buff[i++] = c; 446 | if (HTS_feof(fp)) 447 | break; 448 | c = HTS_fgetc(fp); 449 | if (c == EOF) 450 | break; 451 | } 452 | 453 | buff[i] = '\0'; 454 | return TRUE; 455 | } 456 | 457 | /* HTS_get_token_from_string: get token from string (separators are space, tab, and line break) */ 458 | HTS_Boolean HTS_get_token_from_string(const char *string, size_t * index, char *buff) 459 | { 460 | char c; 461 | size_t i; 462 | 463 | c = string[(*index)]; 464 | if (c == '\0') 465 | return FALSE; 466 | c = string[(*index)++]; 467 | if (c == '\0') 468 | return FALSE; 469 | while (c == ' ' || c == '\n' || c == '\t') { 470 | if (c == '\0') 471 | return FALSE; 472 | c = string[(*index)++]; 473 | } 474 | for (i = 0; c != ' ' && c != '\n' && c != '\t' && c != '\0'; i++) { 475 | buff[i] = c; 476 | c = string[(*index)++]; 477 | } 478 | 479 | buff[i] = '\0'; 480 | return TRUE; 481 | } 482 | 483 | /* HTS_get_token_from_string_with_separator: get token from string with specified separator */ 484 | HTS_Boolean HTS_get_token_from_string_with_separator(const char *str, size_t * index, char *buff, char separator) 485 | { 486 | char c; 487 | size_t len = 0; 488 | 489 | if (str == NULL) 490 | return FALSE; 491 | 492 | c = str[(*index)]; 493 | if (c == '\0') 494 | return FALSE; 495 | while (c == separator) { 496 | if (c == '\0') 497 | return FALSE; 498 | (*index)++; 499 | c = str[(*index)]; 500 | } 501 | while (c != separator && c != '\0') { 502 | buff[len++] = c; 503 | (*index)++; 504 | c = str[(*index)]; 505 | } 506 | if (c != '\0') 507 | (*index)++; 508 | 509 | buff[len] = '\0'; 510 | 511 | if (len > 0) 512 | return TRUE; 513 | else 514 | return FALSE; 515 | } 516 | 517 | /* HTS_calloc: wrapper for calloc */ 518 | void *HTS_calloc(const size_t num, const size_t size) 519 | { 520 | size_t n = num * size; 521 | void *mem; 522 | 523 | if (n == 0) 524 | return NULL; 525 | 526 | #ifdef FESTIVAL 527 | mem = (void *) safe_wcalloc(n); 528 | #else 529 | mem = (void *) malloc(n); 530 | #endif /* FESTIVAL */ 531 | 532 | memset(mem, 0, n); 533 | 534 | if (mem == NULL) 535 | HTS_error(1, "HTS_calloc: Cannot allocate memory.\n"); 536 | 537 | return mem; 538 | } 539 | 540 | /* HTS_Free: wrapper for free */ 541 | void HTS_free(void *ptr) 542 | { 543 | #ifdef FESTIVAL 544 | wfree(ptr); 545 | #else 546 | free(ptr); 547 | #endif /* FESTIVAL */ 548 | } 549 | 550 | /* HTS_strdup: wrapper for strdup */ 551 | char *HTS_strdup(const char *string) 552 | { 553 | #ifdef FESTIVAL 554 | return (wstrdup(string)); 555 | #else 556 | char *buff = (char *) HTS_calloc(strlen(string) + 1, sizeof(char)); 557 | strcpy(buff, string); 558 | return buff; 559 | #endif /* FESTIVAL */ 560 | } 561 | 562 | /* HTS_alloc_matrix: allocate double matrix */ 563 | double **HTS_alloc_matrix(size_t x, size_t y) 564 | { 565 | size_t i; 566 | double **p; 567 | 568 | if (x == 0 || y == 0) 569 | return NULL; 570 | 571 | p = (double **) HTS_calloc(x, sizeof(double *)); 572 | 573 | for (i = 0; i < x; i++) 574 | p[i] = (double *) HTS_calloc(y, sizeof(double)); 575 | return p; 576 | } 577 | 578 | /* HTS_free_matrix: free double matrix */ 579 | void HTS_free_matrix(double **p, size_t x) 580 | { 581 | size_t i; 582 | 583 | for (i = 0; i < x; i++) 584 | HTS_free(p[i]); 585 | HTS_free(p); 586 | } 587 | 588 | /* HTS_error: output error message */ 589 | void HTS_error(int error, const char *message, ...) 590 | { 591 | va_list arg; 592 | 593 | fflush(stdout); 594 | fflush(stderr); 595 | 596 | if (error > 0) 597 | fprintf(stderr, "\nError: "); 598 | else 599 | fprintf(stderr, "\nWarning: "); 600 | 601 | va_start(arg, message); 602 | vfprintf(stderr, message, arg); 603 | va_end(arg); 604 | 605 | fflush(stderr); 606 | 607 | if (error > 0) 608 | exit(error); 609 | } 610 | 611 | HTS_MISC_C_END; 612 | 613 | #endif /* !HTS_MISC_C */ 614 | -------------------------------------------------------------------------------- /src/lib/HTS_pstream.c: -------------------------------------------------------------------------------- 1 | /* ----------------------------------------------------------------- */ 2 | /* The HMM-Based Speech Synthesis Engine "hts_engine API" */ 3 | /* developed by HTS Working Group */ 4 | /* http://hts-engine.sourceforge.net/ */ 5 | /* ----------------------------------------------------------------- */ 6 | /* */ 7 | /* Copyright (c) 2001-2014 Nagoya Institute of Technology */ 8 | /* Department of Computer Science */ 9 | /* */ 10 | /* 2001-2008 Tokyo Institute of Technology */ 11 | /* Interdisciplinary Graduate School of */ 12 | /* Science and Engineering */ 13 | /* */ 14 | /* All rights reserved. */ 15 | /* */ 16 | /* Redistribution and use in source and binary forms, with or */ 17 | /* without modification, are permitted provided that the following */ 18 | /* conditions are met: */ 19 | /* */ 20 | /* - Redistributions of source code must retain the above copyright */ 21 | /* notice, this list of conditions and the following disclaimer. */ 22 | /* - Redistributions in binary form must reproduce the above */ 23 | /* copyright notice, this list of conditions and the following */ 24 | /* disclaimer in the documentation and/or other materials provided */ 25 | /* with the distribution. */ 26 | /* - Neither the name of the HTS working group nor the names of its */ 27 | /* contributors may be used to endorse or promote products derived */ 28 | /* from this software without specific prior written permission. */ 29 | /* */ 30 | /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */ 31 | /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */ 32 | /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ 33 | /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ 34 | /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */ 35 | /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, */ 36 | /* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED */ 37 | /* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, */ 38 | /* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */ 39 | /* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, */ 40 | /* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY */ 41 | /* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ 42 | /* POSSIBILITY OF SUCH DAMAGE. */ 43 | /* ----------------------------------------------------------------- */ 44 | 45 | #ifndef HTS_PSTREAM_C 46 | #define HTS_PSTREAM_C 47 | 48 | #ifdef __cplusplus 49 | #define HTS_PSTREAM_C_START extern "C" { 50 | #define HTS_PSTREAM_C_END } 51 | #else 52 | #define HTS_PSTREAM_C_START 53 | #define HTS_PSTREAM_C_END 54 | #endif /* __CPLUSPLUS */ 55 | 56 | HTS_PSTREAM_C_START; 57 | 58 | #include /* for sqrt() */ 59 | 60 | /* hts_engine libraries */ 61 | #include "HTS_hidden.h" 62 | 63 | /* HTS_finv: calculate 1.0/variance function */ 64 | static double HTS_finv(const double x) 65 | { 66 | if (x >= INFTY2) 67 | return 0.0; 68 | if (x <= -INFTY2) 69 | return 0.0; 70 | if (x <= INVINF2 && x >= 0) 71 | return INFTY; 72 | if (x >= -INVINF2 && x < 0) 73 | return -INFTY; 74 | 75 | return (1.0 / x); 76 | } 77 | 78 | /* HTS_PStream_calc_wuw_and_wum: calcurate W'U^{-1}W and W'U^{-1}M */ 79 | static void HTS_PStream_calc_wuw_and_wum(HTS_PStream * pst, size_t m) 80 | { 81 | size_t t, i, j; 82 | int shift; 83 | double wu; 84 | 85 | for (t = 0; t < pst->length; t++) { 86 | /* initialize */ 87 | pst->sm.wum[t] = 0.0; 88 | for (i = 0; i < pst->width; i++) 89 | pst->sm.wuw[t][i] = 0.0; 90 | 91 | /* calc WUW & WUM */ 92 | for (i = 0; i < pst->win_size; i++) 93 | for (shift = pst->win_l_width[i]; shift <= pst->win_r_width[i]; shift++) 94 | if (((int) t + shift >= 0) && ((int) t + shift < pst->length) && (pst->win_coefficient[i][-shift] != 0.0)) { 95 | wu = pst->win_coefficient[i][-shift] * pst->sm.ivar[t + shift][i * pst->vector_length + m]; 96 | pst->sm.wum[t] += wu * pst->sm.mean[t + shift][i * pst->vector_length + m]; 97 | for (j = 0; (j < pst->width) && (t + j < pst->length); j++) 98 | if (((int) j <= pst->win_r_width[i] + shift) && (pst->win_coefficient[i][j - shift] != 0.0)) 99 | pst->sm.wuw[t][j] += wu * pst->win_coefficient[i][j - shift]; 100 | } 101 | } 102 | } 103 | 104 | 105 | /* HTS_PStream_ldl_factorization: Factorize W'*U^{-1}*W to L*D*L' (L: lower triangular, D: diagonal) */ 106 | static void HTS_PStream_ldl_factorization(HTS_PStream * pst) 107 | { 108 | size_t t, i, j; 109 | 110 | for (t = 0; t < pst->length; t++) { 111 | for (i = 1; (i < pst->width) && (t >= i); i++) 112 | pst->sm.wuw[t][0] -= pst->sm.wuw[t - i][i] * pst->sm.wuw[t - i][i] * pst->sm.wuw[t - i][0]; 113 | 114 | for (i = 1; i < pst->width; i++) { 115 | for (j = 1; (i + j < pst->width) && (t >= j); j++) 116 | pst->sm.wuw[t][i] -= pst->sm.wuw[t - j][j] * pst->sm.wuw[t - j][i + j] * pst->sm.wuw[t - j][0]; 117 | pst->sm.wuw[t][i] /= pst->sm.wuw[t][0]; 118 | } 119 | } 120 | } 121 | 122 | /* HTS_PStream_forward_substitution: forward subtitution for mlpg */ 123 | static void HTS_PStream_forward_substitution(HTS_PStream * pst) 124 | { 125 | size_t t, i; 126 | 127 | for (t = 0; t < pst->length; t++) { 128 | pst->sm.g[t] = pst->sm.wum[t]; 129 | for (i = 1; (i < pst->width) && (t >= i); i++) 130 | pst->sm.g[t] -= pst->sm.wuw[t - i][i] * pst->sm.g[t - i]; 131 | } 132 | } 133 | 134 | /* HTS_PStream_backward_substitution: backward subtitution for mlpg */ 135 | static void HTS_PStream_backward_substitution(HTS_PStream * pst, size_t m) 136 | { 137 | size_t rev, t, i; 138 | 139 | for (rev = 0; rev < pst->length; rev++) { 140 | t = pst->length - 1 - rev; 141 | pst->par[t][m] = pst->sm.g[t] / pst->sm.wuw[t][0]; 142 | for (i = 1; (i < pst->width) && (t + i < pst->length); i++) 143 | pst->par[t][m] -= pst->sm.wuw[t][i] * pst->par[t + i][m]; 144 | } 145 | } 146 | 147 | /* HTS_PStream_calc_gv: subfunction for mlpg using GV */ 148 | static void HTS_PStream_calc_gv(HTS_PStream * pst, size_t m, double *mean, double *vari) 149 | { 150 | size_t t; 151 | 152 | *mean = 0.0; 153 | for (t = 0; t < pst->length; t++) 154 | if (pst->gv_switch[t]) 155 | *mean += pst->par[t][m]; 156 | *mean /= pst->gv_length; 157 | *vari = 0.0; 158 | for (t = 0; t < pst->length; t++) 159 | if (pst->gv_switch[t]) 160 | *vari += (pst->par[t][m] - *mean) * (pst->par[t][m] - *mean); 161 | *vari /= pst->gv_length; 162 | } 163 | 164 | /* HTS_PStream_conv_gv: subfunction for mlpg using GV */ 165 | static void HTS_PStream_conv_gv(HTS_PStream * pst, size_t m) 166 | { 167 | size_t t; 168 | double ratio; 169 | double mean; 170 | double vari; 171 | 172 | HTS_PStream_calc_gv(pst, m, &mean, &vari); 173 | ratio = sqrt(pst->gv_mean[m] / vari); 174 | for (t = 0; t < pst->length; t++) 175 | if (pst->gv_switch[t]) 176 | pst->par[t][m] = ratio * (pst->par[t][m] - mean) + mean; 177 | } 178 | 179 | /* HTS_PStream_calc_derivative: subfunction for mlpg using GV */ 180 | static double HTS_PStream_calc_derivative(HTS_PStream * pst, size_t m) 181 | { 182 | size_t t, i; 183 | double mean; 184 | double vari; 185 | double dv; 186 | double h; 187 | double gvobj; 188 | double hmmobj; 189 | double w = 1.0 / (pst->win_size * pst->length); 190 | 191 | HTS_PStream_calc_gv(pst, m, &mean, &vari); 192 | gvobj = -0.5 * W2 * vari * pst->gv_vari[m] * (vari - 2.0 * pst->gv_mean[m]); 193 | dv = -2.0 * pst->gv_vari[m] * (vari - pst->gv_mean[m]) / pst->length; 194 | 195 | for (t = 0; t < pst->length; t++) { 196 | pst->sm.g[t] = pst->sm.wuw[t][0] * pst->par[t][m]; 197 | for (i = 1; i < pst->width; i++) { 198 | if (t + i < pst->length) 199 | pst->sm.g[t] += pst->sm.wuw[t][i] * pst->par[t + i][m]; 200 | if (t + 1 > i) 201 | pst->sm.g[t] += pst->sm.wuw[t - i][i] * pst->par[t - i][m]; 202 | } 203 | } 204 | 205 | for (t = 0, hmmobj = 0.0; t < pst->length; t++) { 206 | hmmobj += W1 * w * pst->par[t][m] * (pst->sm.wum[t] - 0.5 * pst->sm.g[t]); 207 | h = -W1 * w * pst->sm.wuw[t][1 - 1] - W2 * 2.0 / (pst->length * pst->length) * ((pst->length - 1) * pst->gv_vari[m] * (vari - pst->gv_mean[m]) + 2.0 * pst->gv_vari[m] * (pst->par[t][m] - mean) * (pst->par[t][m] - mean)); 208 | if (pst->gv_switch[t]) 209 | pst->sm.g[t] = 1.0 / h * (W1 * w * (-pst->sm.g[t] + pst->sm.wum[t]) + W2 * dv * (pst->par[t][m] - mean)); 210 | else 211 | pst->sm.g[t] = 1.0 / h * (W1 * w * (-pst->sm.g[t] + pst->sm.wum[t])); 212 | } 213 | 214 | return (-(hmmobj + gvobj)); 215 | } 216 | 217 | /* HTS_PStream_gv_parmgen: function for mlpg using GV */ 218 | static void HTS_PStream_gv_parmgen(HTS_PStream * pst, size_t m) 219 | { 220 | size_t t, i; 221 | double step = STEPINIT; 222 | double prev = 0.0; 223 | double obj; 224 | 225 | if (pst->gv_length == 0) 226 | return; 227 | 228 | HTS_PStream_conv_gv(pst, m); 229 | if (GV_MAX_ITERATION > 0) { 230 | HTS_PStream_calc_wuw_and_wum(pst, m); 231 | for (i = 1; i <= GV_MAX_ITERATION; i++) { 232 | obj = HTS_PStream_calc_derivative(pst, m); 233 | if (i > 1) { 234 | if (obj > prev) 235 | step *= STEPDEC; 236 | if (obj < prev) 237 | step *= STEPINC; 238 | } 239 | for (t = 0; t < pst->length; t++) 240 | pst->par[t][m] += step * pst->sm.g[t]; 241 | prev = obj; 242 | } 243 | } 244 | } 245 | 246 | /* HTS_PStream_mlpg: generate sequence of speech parameter vector maximizing its output probability for given pdf sequence */ 247 | static void HTS_PStream_mlpg(HTS_PStream * pst) 248 | { 249 | size_t m; 250 | 251 | if (pst->length == 0) 252 | return; 253 | 254 | for (m = 0; m < pst->vector_length; m++) { 255 | HTS_PStream_calc_wuw_and_wum(pst, m); 256 | HTS_PStream_ldl_factorization(pst); /* LDL factorization */ 257 | HTS_PStream_forward_substitution(pst); /* forward substitution */ 258 | HTS_PStream_backward_substitution(pst, m); /* backward substitution */ 259 | if (pst->gv_length > 0) 260 | HTS_PStream_gv_parmgen(pst, m); 261 | } 262 | } 263 | 264 | /* HTS_PStreamSet_initialize: initialize parameter stream set */ 265 | void HTS_PStreamSet_initialize(HTS_PStreamSet * pss) 266 | { 267 | pss->pstream = NULL; 268 | pss->nstream = 0; 269 | pss->total_frame = 0; 270 | } 271 | 272 | /* HTS_PStreamSet_create: parameter generation using GV weight */ 273 | HTS_Boolean HTS_PStreamSet_create(HTS_PStreamSet * pss, HTS_SStreamSet * sss, double *msd_threshold, double *gv_weight) 274 | { 275 | size_t i, j, k, l, m; 276 | int shift; 277 | size_t frame, msd_frame, state; 278 | 279 | HTS_PStream *pst; 280 | HTS_Boolean not_bound; 281 | 282 | if (pss->nstream != 0) { 283 | HTS_error(1, "HTS_PstreamSet_create: HTS_PStreamSet should be clear.\n"); 284 | return FALSE; 285 | } 286 | 287 | /* initialize */ 288 | pss->nstream = HTS_SStreamSet_get_nstream(sss); 289 | pss->pstream = (HTS_PStream *) HTS_calloc(pss->nstream, sizeof(HTS_PStream)); 290 | pss->total_frame = HTS_SStreamSet_get_total_frame(sss); 291 | 292 | /* create */ 293 | for (i = 0; i < pss->nstream; i++) { 294 | pst = &pss->pstream[i]; 295 | if (HTS_SStreamSet_is_msd(sss, i)) { /* for MSD */ 296 | pst->length = 0; 297 | for (state = 0; state < HTS_SStreamSet_get_total_state(sss); state++) 298 | if (HTS_SStreamSet_get_msd(sss, i, state) > msd_threshold[i]) 299 | pst->length += HTS_SStreamSet_get_duration(sss, state); 300 | pst->msd_flag = (HTS_Boolean *) HTS_calloc(pss->total_frame, sizeof(HTS_Boolean)); 301 | for (state = 0, frame = 0; state < HTS_SStreamSet_get_total_state(sss); state++) 302 | if (HTS_SStreamSet_get_msd(sss, i, state) > msd_threshold[i]) 303 | for (j = 0; j < HTS_SStreamSet_get_duration(sss, state); j++) { 304 | pst->msd_flag[frame] = TRUE; 305 | frame++; 306 | } else 307 | for (j = 0; j < HTS_SStreamSet_get_duration(sss, state); j++) { 308 | pst->msd_flag[frame] = FALSE; 309 | frame++; 310 | } 311 | } else { /* for non MSD */ 312 | pst->length = pss->total_frame; 313 | pst->msd_flag = NULL; 314 | } 315 | pst->vector_length = HTS_SStreamSet_get_vector_length(sss, i); 316 | pst->width = HTS_SStreamSet_get_window_max_width(sss, i) * 2 + 1; /* band width of R */ 317 | pst->win_size = HTS_SStreamSet_get_window_size(sss, i); 318 | if (pst->length > 0) { 319 | pst->sm.mean = HTS_alloc_matrix(pst->length, pst->vector_length * pst->win_size); 320 | pst->sm.ivar = HTS_alloc_matrix(pst->length, pst->vector_length * pst->win_size); 321 | pst->sm.wum = (double *) HTS_calloc(pst->length, sizeof(double)); 322 | pst->sm.wuw = HTS_alloc_matrix(pst->length, pst->width); 323 | pst->sm.g = (double *) HTS_calloc(pst->length, sizeof(double)); 324 | pst->par = HTS_alloc_matrix(pst->length, pst->vector_length); 325 | } 326 | /* copy dynamic window */ 327 | pst->win_l_width = (int *) HTS_calloc(pst->win_size, sizeof(int)); 328 | pst->win_r_width = (int *) HTS_calloc(pst->win_size, sizeof(int)); 329 | pst->win_coefficient = (double **) HTS_calloc(pst->win_size, sizeof(double)); 330 | for (j = 0; j < pst->win_size; j++) { 331 | pst->win_l_width[j] = HTS_SStreamSet_get_window_left_width(sss, i, j); 332 | pst->win_r_width[j] = HTS_SStreamSet_get_window_right_width(sss, i, j); 333 | if (pst->win_l_width[j] + pst->win_r_width[j] == 0) 334 | pst->win_coefficient[j] = (double *) 335 | HTS_calloc(-2 * pst->win_l_width[j] + 1, sizeof(double)); 336 | else 337 | pst->win_coefficient[j] = (double *) 338 | HTS_calloc(-2 * pst->win_l_width[j], sizeof(double)); 339 | pst->win_coefficient[j] -= pst->win_l_width[j]; 340 | for (shift = pst->win_l_width[j]; shift <= pst->win_r_width[j]; shift++) 341 | pst->win_coefficient[j][shift] = HTS_SStreamSet_get_window_coefficient(sss, i, j, shift); 342 | } 343 | /* copy GV */ 344 | if (HTS_SStreamSet_use_gv(sss, i)) { 345 | pst->gv_mean = (double *) HTS_calloc(pst->vector_length, sizeof(double)); 346 | pst->gv_vari = (double *) HTS_calloc(pst->vector_length, sizeof(double)); 347 | for (j = 0; j < pst->vector_length; j++) { 348 | pst->gv_mean[j] = HTS_SStreamSet_get_gv_mean(sss, i, j) * gv_weight[i]; 349 | pst->gv_vari[j] = HTS_SStreamSet_get_gv_vari(sss, i, j); 350 | } 351 | pst->gv_switch = (HTS_Boolean *) HTS_calloc(pst->length, sizeof(HTS_Boolean)); 352 | if (HTS_SStreamSet_is_msd(sss, i)) { /* for MSD */ 353 | for (state = 0, frame = 0, msd_frame = 0; state < HTS_SStreamSet_get_total_state(sss); state++) 354 | for (j = 0; j < HTS_SStreamSet_get_duration(sss, state); j++, frame++) 355 | if (pst->msd_flag[frame]) 356 | pst->gv_switch[msd_frame++] = HTS_SStreamSet_get_gv_switch(sss, i, state); 357 | } else { /* for non MSD */ 358 | for (state = 0, frame = 0; state < HTS_SStreamSet_get_total_state(sss); state++) 359 | for (j = 0; j < HTS_SStreamSet_get_duration(sss, state); j++) 360 | pst->gv_switch[frame++] = HTS_SStreamSet_get_gv_switch(sss, i, state); 361 | } 362 | for (j = 0, pst->gv_length = 0; j < pst->length; j++) 363 | if (pst->gv_switch[j]) 364 | pst->gv_length++; 365 | } else { 366 | pst->gv_switch = NULL; 367 | pst->gv_length = 0; 368 | pst->gv_mean = NULL; 369 | pst->gv_vari = NULL; 370 | } 371 | /* copy pdfs */ 372 | if (HTS_SStreamSet_is_msd(sss, i)) { /* for MSD */ 373 | for (state = 0, frame = 0, msd_frame = 0; state < HTS_SStreamSet_get_total_state(sss); state++) { 374 | for (j = 0; j < HTS_SStreamSet_get_duration(sss, state); j++) { 375 | if (pst->msd_flag[frame]) { 376 | /* check current frame is MSD boundary or not */ 377 | for (k = 0; k < pst->win_size; k++) { 378 | not_bound = TRUE; 379 | for (shift = pst->win_l_width[k]; shift <= pst->win_r_width[k]; shift++) 380 | if ((int) frame + shift < 0 || (int) pss->total_frame <= (int) frame + shift || !pst->msd_flag[frame + shift]) { 381 | not_bound = FALSE; 382 | break; 383 | } 384 | for (l = 0; l < pst->vector_length; l++) { 385 | m = pst->vector_length * k + l; 386 | pst->sm.mean[msd_frame][m] = HTS_SStreamSet_get_mean(sss, i, state, m); 387 | if (not_bound || k == 0) 388 | pst->sm.ivar[msd_frame][m] = HTS_finv(HTS_SStreamSet_get_vari(sss, i, state, m)); 389 | else 390 | pst->sm.ivar[msd_frame][m] = 0.0; 391 | } 392 | } 393 | msd_frame++; 394 | } 395 | frame++; 396 | } 397 | } 398 | } else { /* for non MSD */ 399 | for (state = 0, frame = 0; state < HTS_SStreamSet_get_total_state(sss); state++) { 400 | for (j = 0; j < HTS_SStreamSet_get_duration(sss, state); j++) { 401 | for (k = 0; k < pst->win_size; k++) { 402 | not_bound = TRUE; 403 | for (shift = pst->win_l_width[k]; shift <= pst->win_r_width[k]; shift++) 404 | if ((int) frame + shift < 0 || (int) pss->total_frame <= (int) frame + shift) { 405 | not_bound = FALSE; 406 | break; 407 | } 408 | for (l = 0; l < pst->vector_length; l++) { 409 | m = pst->vector_length * k + l; 410 | pst->sm.mean[frame][m] = HTS_SStreamSet_get_mean(sss, i, state, m); 411 | if (not_bound || k == 0) 412 | pst->sm.ivar[frame][m] = HTS_finv(HTS_SStreamSet_get_vari(sss, i, state, m)); 413 | else 414 | pst->sm.ivar[frame][m] = 0.0; 415 | } 416 | } 417 | frame++; 418 | } 419 | } 420 | } 421 | /* parameter generation */ 422 | HTS_PStream_mlpg(pst); 423 | } 424 | 425 | return TRUE; 426 | } 427 | 428 | /* HTS_PStreamSet_get_nstream: get number of stream */ 429 | size_t HTS_PStreamSet_get_nstream(HTS_PStreamSet * pss) 430 | { 431 | return pss->nstream; 432 | } 433 | 434 | /* HTS_PStreamSet_get_vector_length: get feature length */ 435 | size_t HTS_PStreamSet_get_vector_length(HTS_PStreamSet * pss, size_t stream_index) 436 | { 437 | return pss->pstream[stream_index].vector_length; 438 | } 439 | 440 | /* HTS_PStreamSet_get_total_frame: get total number of frame */ 441 | size_t HTS_PStreamSet_get_total_frame(HTS_PStreamSet * pss) 442 | { 443 | return pss->total_frame; 444 | } 445 | 446 | /* HTS_PStreamSet_get_parameter: get parameter */ 447 | double HTS_PStreamSet_get_parameter(HTS_PStreamSet * pss, size_t stream_index, size_t frame_index, size_t vector_index) 448 | { 449 | return pss->pstream[stream_index].par[frame_index][vector_index]; 450 | } 451 | 452 | /* HTS_PStreamSet_get_parameter_vector: get parameter vector*/ 453 | double *HTS_PStreamSet_get_parameter_vector(HTS_PStreamSet * pss, size_t stream_index, size_t frame_index) 454 | { 455 | return pss->pstream[stream_index].par[frame_index]; 456 | } 457 | 458 | /* HTS_PStreamSet_get_msd_flag: get generated MSD flag per frame */ 459 | HTS_Boolean HTS_PStreamSet_get_msd_flag(HTS_PStreamSet * pss, size_t stream_index, size_t frame_index) 460 | { 461 | return pss->pstream[stream_index].msd_flag[frame_index]; 462 | } 463 | 464 | /* HTS_PStreamSet_is_msd: get MSD flag */ 465 | HTS_Boolean HTS_PStreamSet_is_msd(HTS_PStreamSet * pss, size_t stream_index) 466 | { 467 | return pss->pstream[stream_index].msd_flag ? TRUE : FALSE; 468 | } 469 | 470 | /* HTS_PStreamSet_clear: free parameter stream set */ 471 | void HTS_PStreamSet_clear(HTS_PStreamSet * pss) 472 | { 473 | size_t i, j; 474 | HTS_PStream *pstream; 475 | 476 | if (pss->pstream) { 477 | for (i = 0; i < pss->nstream; i++) { 478 | pstream = &pss->pstream[i]; 479 | if (pstream->sm.wum) 480 | HTS_free(pstream->sm.wum); 481 | if (pstream->sm.g) 482 | HTS_free(pstream->sm.g); 483 | if (pstream->sm.wuw) 484 | HTS_free_matrix(pstream->sm.wuw, pstream->length); 485 | if (pstream->sm.ivar) 486 | HTS_free_matrix(pstream->sm.ivar, pstream->length); 487 | if (pstream->sm.mean) 488 | HTS_free_matrix(pstream->sm.mean, pstream->length); 489 | if (pstream->par) 490 | HTS_free_matrix(pstream->par, pstream->length); 491 | if (pstream->msd_flag) 492 | HTS_free(pstream->msd_flag); 493 | if (pstream->win_coefficient) { 494 | for (j = 0; j < pstream->win_size; j++) { 495 | pstream->win_coefficient[j] += pstream->win_l_width[j]; 496 | HTS_free(pstream->win_coefficient[j]); 497 | } 498 | } 499 | if (pstream->gv_mean) 500 | HTS_free(pstream->gv_mean); 501 | if (pstream->gv_vari) 502 | HTS_free(pstream->gv_vari); 503 | if (pstream->win_coefficient) 504 | HTS_free(pstream->win_coefficient); 505 | if (pstream->win_l_width) 506 | HTS_free(pstream->win_l_width); 507 | if (pstream->win_r_width) 508 | HTS_free(pstream->win_r_width); 509 | if (pstream->gv_switch) 510 | HTS_free(pstream->gv_switch); 511 | } 512 | HTS_free(pss->pstream); 513 | } 514 | HTS_PStreamSet_initialize(pss); 515 | } 516 | 517 | HTS_PSTREAM_C_END; 518 | 519 | #endif /* !HTS_PSTREAM_C */ 520 | -------------------------------------------------------------------------------- /src/lib/HTS_sstream.c: -------------------------------------------------------------------------------- 1 | /* ----------------------------------------------------------------- */ 2 | /* The HMM-Based Speech Synthesis Engine "hts_engine API" */ 3 | /* developed by HTS Working Group */ 4 | /* http://hts-engine.sourceforge.net/ */ 5 | /* ----------------------------------------------------------------- */ 6 | /* */ 7 | /* Copyright (c) 2001-2014 Nagoya Institute of Technology */ 8 | /* Department of Computer Science */ 9 | /* */ 10 | /* 2001-2008 Tokyo Institute of Technology */ 11 | /* Interdisciplinary Graduate School of */ 12 | /* Science and Engineering */ 13 | /* */ 14 | /* All rights reserved. */ 15 | /* */ 16 | /* Redistribution and use in source and binary forms, with or */ 17 | /* without modification, are permitted provided that the following */ 18 | /* conditions are met: */ 19 | /* */ 20 | /* - Redistributions of source code must retain the above copyright */ 21 | /* notice, this list of conditions and the following disclaimer. */ 22 | /* - Redistributions in binary form must reproduce the above */ 23 | /* copyright notice, this list of conditions and the following */ 24 | /* disclaimer in the documentation and/or other materials provided */ 25 | /* with the distribution. */ 26 | /* - Neither the name of the HTS working group nor the names of its */ 27 | /* contributors may be used to endorse or promote products derived */ 28 | /* from this software without specific prior written permission. */ 29 | /* */ 30 | /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */ 31 | /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */ 32 | /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ 33 | /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ 34 | /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */ 35 | /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, */ 36 | /* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED */ 37 | /* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, */ 38 | /* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */ 39 | /* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, */ 40 | /* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY */ 41 | /* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ 42 | /* POSSIBILITY OF SUCH DAMAGE. */ 43 | /* ----------------------------------------------------------------- */ 44 | 45 | #ifndef HTS_SSTREAM_C 46 | #define HTS_SSTREAM_C 47 | 48 | #ifdef __cplusplus 49 | #define HTS_SSTREAM_C_START extern "C" { 50 | #define HTS_SSTREAM_C_END } 51 | #else 52 | #define HTS_SSTREAM_C_START 53 | #define HTS_SSTREAM_C_END 54 | #endif /* __CPLUSPLUS */ 55 | 56 | HTS_SSTREAM_C_START; 57 | 58 | #include 59 | #include 60 | 61 | /* hts_engine libraries */ 62 | #include "HTS_hidden.h" 63 | 64 | /* HTS_set_default_duration: set default duration from state duration probability distribution */ 65 | static double HTS_set_default_duration(size_t * duration, double *mean, double *vari, size_t size) 66 | { 67 | size_t i; 68 | double temp; 69 | size_t sum = 0; 70 | 71 | for (i = 0; i < size; i++) { 72 | temp = mean[i] + 0.5; 73 | if (temp < 1.0) 74 | duration[i] = 1; 75 | else 76 | duration[i] = (size_t) temp; 77 | sum += duration[i]; 78 | } 79 | 80 | return (double) sum; 81 | } 82 | 83 | /* HTS_set_specified_duration: set duration from state duration probability distribution and specified frame length */ 84 | static double HTS_set_specified_duration(size_t * duration, double *mean, double *vari, size_t size, double frame_length) 85 | { 86 | size_t i; 87 | int j; 88 | double temp1, temp2; 89 | double rho = 0.0; 90 | size_t sum = 0; 91 | size_t target_length; 92 | 93 | /* get the target frame length */ 94 | if (frame_length + 0.5 < 1.0) 95 | target_length = 1; 96 | else 97 | target_length = (size_t) (frame_length + 0.5); 98 | 99 | /* check the specified duration */ 100 | if (target_length <= size) { 101 | if (target_length < size) 102 | HTS_error(-1, "HTS_set_specified_duration: Specified frame length is too short.\n"); 103 | for (i = 0; i < size; i++) 104 | duration[i] = 1; 105 | return (double) size; 106 | } 107 | 108 | /* RHO calculation */ 109 | temp1 = 0.0; 110 | temp2 = 0.0; 111 | for (i = 0; i < size; i++) { 112 | temp1 += mean[i]; 113 | temp2 += vari[i]; 114 | } 115 | rho = ((double) target_length - temp1) / temp2; 116 | 117 | /* first estimation */ 118 | for (i = 0; i < size; i++) { 119 | temp1 = mean[i] + rho * vari[i] + 0.5; 120 | if (temp1 < 1.0) 121 | duration[i] = 1; 122 | else 123 | duration[i] = (size_t) temp1; 124 | sum += duration[i]; 125 | } 126 | 127 | /* loop estimation */ 128 | while (target_length != sum) { 129 | /* sarch flexible state and modify its duration */ 130 | if (target_length > sum) { 131 | j = -1; 132 | for (i = 0; i < size; i++) { 133 | temp2 = fabs(rho - ((double) duration[i] + 1 - mean[i]) / vari[i]); 134 | if (j < 0 || temp1 > temp2) { 135 | j = i; 136 | temp1 = temp2; 137 | } 138 | } 139 | sum++; 140 | duration[j]++; 141 | } else { 142 | j = -1; 143 | for (i = 0; i < size; i++) { 144 | if (duration[i] > 1) { 145 | temp2 = fabs(rho - ((double) duration[i] - 1 - mean[i]) / vari[i]); 146 | if (j < 0 || temp1 > temp2) { 147 | j = i; 148 | temp1 = temp2; 149 | } 150 | } 151 | } 152 | sum--; 153 | duration[j]--; 154 | } 155 | } 156 | 157 | return (double) target_length; 158 | } 159 | 160 | /* HTS_SStreamSet_initialize: initialize state stream set */ 161 | void HTS_SStreamSet_initialize(HTS_SStreamSet * sss) 162 | { 163 | sss->nstream = 0; 164 | sss->nstate = 0; 165 | sss->sstream = NULL; 166 | sss->duration = NULL; 167 | sss->total_state = 0; 168 | sss->total_frame = 0; 169 | } 170 | 171 | /* HTS_SStreamSet_create: parse label and determine state duration */ 172 | HTS_Boolean HTS_SStreamSet_create(HTS_SStreamSet * sss, HTS_ModelSet * ms, HTS_Label * label, HTS_Boolean phoneme_alignment_flag, double speed, double *duration_iw, double **parameter_iw, double **gv_iw) 173 | { 174 | size_t i, j, k; 175 | double temp; 176 | int shift; 177 | size_t state; 178 | HTS_SStream *sst; 179 | double *duration_mean, *duration_vari; 180 | double frame_length; 181 | size_t next_time; 182 | size_t next_state; 183 | 184 | /* check interpolation weights */ 185 | for (i = 0, temp = 0.0; i < HTS_ModelSet_get_nvoices(ms); i++) 186 | temp += duration_iw[i]; 187 | if (temp == 0.0) { 188 | return FALSE; 189 | } else if (temp != 1.0) { 190 | for (i = 0; i < HTS_ModelSet_get_nvoices(ms); i++) 191 | if (duration_iw[i] != 0.0) 192 | duration_iw[i] /= temp; 193 | } 194 | 195 | for (i = 0; i < HTS_ModelSet_get_nstream(ms); i++) { 196 | for (j = 0, temp = 0.0; j < HTS_ModelSet_get_nvoices(ms); j++) 197 | temp += parameter_iw[j][i]; 198 | if (temp == 0.0) { 199 | return FALSE; 200 | } else if (temp != 1.0) { 201 | for (j = 0; j < HTS_ModelSet_get_nvoices(ms); j++) 202 | if (parameter_iw[j][i] != 0.0) 203 | parameter_iw[j][i] /= temp; 204 | } 205 | if (HTS_ModelSet_use_gv(ms, i)) { 206 | for (j = 0, temp = 0.0; j < HTS_ModelSet_get_nvoices(ms); j++) 207 | temp += gv_iw[j][i]; 208 | if (temp == 0.0) 209 | return FALSE; 210 | else if (temp != 1.0) 211 | for (j = 0; j < HTS_ModelSet_get_nvoices(ms); j++) 212 | if (gv_iw[j][i] != 0.0) 213 | gv_iw[j][i] /= temp; 214 | } 215 | } 216 | 217 | /* initialize state sequence */ 218 | sss->nstate = HTS_ModelSet_get_nstate(ms); 219 | sss->nstream = HTS_ModelSet_get_nstream(ms); 220 | sss->total_frame = 0; 221 | sss->total_state = HTS_Label_get_size(label) * sss->nstate; 222 | sss->duration = (size_t *) HTS_calloc(sss->total_state, sizeof(size_t)); 223 | sss->sstream = (HTS_SStream *) HTS_calloc(sss->nstream, sizeof(HTS_SStream)); 224 | for (i = 0; i < sss->nstream; i++) { 225 | sst = &sss->sstream[i]; 226 | sst->vector_length = HTS_ModelSet_get_vector_length(ms, i); 227 | sst->mean = (double **) HTS_calloc(sss->total_state, sizeof(double *)); 228 | sst->vari = (double **) HTS_calloc(sss->total_state, sizeof(double *)); 229 | if (HTS_ModelSet_is_msd(ms, i)) 230 | sst->msd = (double *) HTS_calloc(sss->total_state, sizeof(double)); 231 | else 232 | sst->msd = NULL; 233 | for (j = 0; j < sss->total_state; j++) { 234 | sst->mean[j] = (double *) HTS_calloc(sst->vector_length * HTS_ModelSet_get_window_size(ms, i), sizeof(double)); 235 | sst->vari[j] = (double *) HTS_calloc(sst->vector_length * HTS_ModelSet_get_window_size(ms, i), sizeof(double)); 236 | } 237 | if (HTS_ModelSet_use_gv(ms, i)) { 238 | sst->gv_switch = (HTS_Boolean *) HTS_calloc(sss->total_state, sizeof(HTS_Boolean)); 239 | for (j = 0; j < sss->total_state; j++) 240 | sst->gv_switch[j] = TRUE; 241 | } else { 242 | sst->gv_switch = NULL; 243 | } 244 | } 245 | 246 | /* determine state duration */ 247 | duration_mean = (double *) HTS_calloc(sss->total_state, sizeof(double)); 248 | duration_vari = (double *) HTS_calloc(sss->total_state, sizeof(double)); 249 | for (i = 0; i < HTS_Label_get_size(label); i++) 250 | HTS_ModelSet_get_duration(ms, HTS_Label_get_string(label, i), duration_iw, &duration_mean[i * sss->nstate], &duration_vari[i * sss->nstate]); 251 | if (phoneme_alignment_flag == TRUE) { 252 | /* use duration set by user */ 253 | next_time = 0; 254 | next_state = 0; 255 | state = 0; 256 | for (i = 0; i < HTS_Label_get_size(label); i++) { 257 | temp = HTS_Label_get_end_frame(label, i); 258 | if (temp >= 0) { 259 | next_time += (size_t) HTS_set_specified_duration(&sss->duration[next_state], &duration_mean[next_state], &duration_vari[next_state], state + sss->nstate - next_state, temp - next_time); 260 | next_state = state + sss->nstate; 261 | } else if (i + 1 == HTS_Label_get_size(label)) { 262 | HTS_error(-1, "HTS_SStreamSet_create: The time of final label is not specified.\n"); 263 | HTS_set_default_duration(&sss->duration[next_state], &duration_mean[next_state], &duration_vari[next_state], state + sss->nstate - next_state); 264 | } 265 | state += sss->nstate; 266 | } 267 | } else { 268 | /* determine frame length */ 269 | if (speed != 1.0) { 270 | temp = 0.0; 271 | for (i = 0; i < sss->total_state; i++) { 272 | temp += duration_mean[i]; 273 | } 274 | frame_length = temp / speed; 275 | HTS_set_specified_duration(sss->duration, duration_mean, duration_vari, sss->total_state, frame_length); 276 | } else { 277 | HTS_set_default_duration(sss->duration, duration_mean, duration_vari, sss->total_state); 278 | } 279 | } 280 | HTS_free(duration_mean); 281 | HTS_free(duration_vari); 282 | 283 | /* get parameter */ 284 | for (i = 0, state = 0; i < HTS_Label_get_size(label); i++) { 285 | for (j = 2; j <= sss->nstate + 1; j++) { 286 | sss->total_frame += sss->duration[state]; 287 | for (k = 0; k < sss->nstream; k++) { 288 | sst = &sss->sstream[k]; 289 | if (sst->msd) 290 | HTS_ModelSet_get_parameter(ms, k, j, HTS_Label_get_string(label, i), (const double *const *) parameter_iw, sst->mean[state], sst->vari[state], &sst->msd[state]); 291 | else 292 | HTS_ModelSet_get_parameter(ms, k, j, HTS_Label_get_string(label, i), (const double *const *) parameter_iw, sst->mean[state], sst->vari[state], NULL); 293 | } 294 | state++; 295 | } 296 | } 297 | 298 | /* copy dynamic window */ 299 | for (i = 0; i < sss->nstream; i++) { 300 | sst = &sss->sstream[i]; 301 | sst->win_size = HTS_ModelSet_get_window_size(ms, i); 302 | sst->win_max_width = HTS_ModelSet_get_window_max_width(ms, i); 303 | sst->win_l_width = (int *) HTS_calloc(sst->win_size, sizeof(int)); 304 | sst->win_r_width = (int *) HTS_calloc(sst->win_size, sizeof(int)); 305 | sst->win_coefficient = (double **) HTS_calloc(sst->win_size, sizeof(double)); 306 | for (j = 0; j < sst->win_size; j++) { 307 | sst->win_l_width[j] = HTS_ModelSet_get_window_left_width(ms, i, j); 308 | sst->win_r_width[j] = HTS_ModelSet_get_window_right_width(ms, i, j); 309 | if (sst->win_l_width[j] + sst->win_r_width[j] == 0) 310 | sst->win_coefficient[j] = (double *) HTS_calloc(-2 * sst->win_l_width[j] + 1, sizeof(double)); 311 | else 312 | sst->win_coefficient[j] = (double *) HTS_calloc(-2 * sst->win_l_width[j], sizeof(double)); 313 | sst->win_coefficient[j] -= sst->win_l_width[j]; 314 | for (shift = sst->win_l_width[j]; shift <= sst->win_r_width[j]; shift++) 315 | sst->win_coefficient[j][shift] = HTS_ModelSet_get_window_coefficient(ms, i, j, shift); 316 | } 317 | } 318 | 319 | /* determine GV */ 320 | for (i = 0; i < sss->nstream; i++) { 321 | sst = &sss->sstream[i]; 322 | if (HTS_ModelSet_use_gv(ms, i)) { 323 | sst->gv_mean = (double *) HTS_calloc(sst->vector_length, sizeof(double)); 324 | sst->gv_vari = (double *) HTS_calloc(sst->vector_length, sizeof(double)); 325 | HTS_ModelSet_get_gv(ms, i, HTS_Label_get_string(label, 0), (const double *const *) gv_iw, sst->gv_mean, sst->gv_vari); 326 | } else { 327 | sst->gv_mean = NULL; 328 | sst->gv_vari = NULL; 329 | } 330 | } 331 | 332 | for (i = 0; i < HTS_Label_get_size(label); i++) 333 | if (HTS_ModelSet_get_gv_flag(ms, HTS_Label_get_string(label, i)) == FALSE) 334 | for (j = 0; j < sss->nstream; j++) 335 | if (HTS_ModelSet_use_gv(ms, j) == TRUE) 336 | for (k = 0; k < sss->nstate; k++) 337 | sss->sstream[j].gv_switch[i * sss->nstate + k] = FALSE; 338 | 339 | return TRUE; 340 | } 341 | 342 | /* HTS_SStreamSet_get_nstream: get number of stream */ 343 | size_t HTS_SStreamSet_get_nstream(HTS_SStreamSet * sss) 344 | { 345 | return sss->nstream; 346 | } 347 | 348 | /* HTS_SStreamSet_get_vector_length: get vector length */ 349 | size_t HTS_SStreamSet_get_vector_length(HTS_SStreamSet * sss, size_t stream_index) 350 | { 351 | return sss->sstream[stream_index].vector_length; 352 | } 353 | 354 | /* HTS_SStreamSet_is_msd: get MSD flag */ 355 | HTS_Boolean HTS_SStreamSet_is_msd(HTS_SStreamSet * sss, size_t stream_index) 356 | { 357 | return sss->sstream[stream_index].msd ? TRUE : FALSE; 358 | } 359 | 360 | /* HTS_SStreamSet_get_total_state: get total number of state */ 361 | size_t HTS_SStreamSet_get_total_state(HTS_SStreamSet * sss) 362 | { 363 | return sss->total_state; 364 | } 365 | 366 | /* HTS_SStreamSet_get_total_frame: get total number of frame */ 367 | size_t HTS_SStreamSet_get_total_frame(HTS_SStreamSet * sss) 368 | { 369 | return sss->total_frame; 370 | } 371 | 372 | /* HTS_SStreamSet_get_msd: get MSD parameter */ 373 | double HTS_SStreamSet_get_msd(HTS_SStreamSet * sss, size_t stream_index, size_t state_index) 374 | { 375 | return sss->sstream[stream_index].msd[state_index]; 376 | } 377 | 378 | /* HTS_SStreamSet_window_size: get dynamic window size */ 379 | size_t HTS_SStreamSet_get_window_size(HTS_SStreamSet * sss, size_t stream_index) 380 | { 381 | return sss->sstream[stream_index].win_size; 382 | } 383 | 384 | /* HTS_SStreamSet_get_window_left_width: get left width of dynamic window */ 385 | int HTS_SStreamSet_get_window_left_width(HTS_SStreamSet * sss, size_t stream_index, size_t window_index) 386 | { 387 | return sss->sstream[stream_index].win_l_width[window_index]; 388 | } 389 | 390 | /* HTS_SStreamSet_get_winodow_right_width: get right width of dynamic window */ 391 | int HTS_SStreamSet_get_window_right_width(HTS_SStreamSet * sss, size_t stream_index, size_t window_index) 392 | { 393 | return sss->sstream[stream_index].win_r_width[window_index]; 394 | } 395 | 396 | /* HTS_SStreamSet_get_window_coefficient: get coefficient of dynamic window */ 397 | double HTS_SStreamSet_get_window_coefficient(HTS_SStreamSet * sss, size_t stream_index, size_t window_index, int coefficient_index) 398 | { 399 | return sss->sstream[stream_index].win_coefficient[window_index][coefficient_index]; 400 | } 401 | 402 | /* HTS_SStreamSet_get_window_max_width: get max width of dynamic window */ 403 | size_t HTS_SStreamSet_get_window_max_width(HTS_SStreamSet * sss, size_t stream_index) 404 | { 405 | return sss->sstream[stream_index].win_max_width; 406 | } 407 | 408 | /* HTS_SStreamSet_use_gv: get GV flag */ 409 | HTS_Boolean HTS_SStreamSet_use_gv(HTS_SStreamSet * sss, size_t stream_index) 410 | { 411 | return sss->sstream[stream_index].gv_mean ? TRUE : FALSE; 412 | } 413 | 414 | /* HTS_SStreamSet_get_duration: get state duration */ 415 | size_t HTS_SStreamSet_get_duration(HTS_SStreamSet * sss, size_t state_index) 416 | { 417 | return sss->duration[state_index]; 418 | } 419 | 420 | /* HTS_SStreamSet_get_mean: get mean parameter */ 421 | double HTS_SStreamSet_get_mean(HTS_SStreamSet * sss, size_t stream_index, size_t state_index, size_t vector_index) 422 | { 423 | return sss->sstream[stream_index].mean[state_index][vector_index]; 424 | } 425 | 426 | /* HTS_SStreamSet_set_mean: set mean parameter */ 427 | void HTS_SStreamSet_set_mean(HTS_SStreamSet * sss, size_t stream_index, size_t state_index, size_t vector_index, double f) 428 | { 429 | sss->sstream[stream_index].mean[state_index][vector_index] = f; 430 | } 431 | 432 | /* HTS_SStreamSet_get_vari: get variance parameter */ 433 | double HTS_SStreamSet_get_vari(HTS_SStreamSet * sss, size_t stream_index, size_t state_index, size_t vector_index) 434 | { 435 | return sss->sstream[stream_index].vari[state_index][vector_index]; 436 | } 437 | 438 | /* HTS_SStreamSet_set_vari: set variance parameter */ 439 | void HTS_SStreamSet_set_vari(HTS_SStreamSet * sss, size_t stream_index, size_t state_index, size_t vector_index, double f) 440 | { 441 | sss->sstream[stream_index].vari[state_index][vector_index] = f; 442 | } 443 | 444 | /* HTS_SStreamSet_get_gv_mean: get GV mean parameter */ 445 | double HTS_SStreamSet_get_gv_mean(HTS_SStreamSet * sss, size_t stream_index, size_t vector_index) 446 | { 447 | return sss->sstream[stream_index].gv_mean[vector_index]; 448 | } 449 | 450 | /* HTS_SStreamSet_get_gv_mean: get GV variance parameter */ 451 | double HTS_SStreamSet_get_gv_vari(HTS_SStreamSet * sss, size_t stream_index, size_t vector_index) 452 | { 453 | return sss->sstream[stream_index].gv_vari[vector_index]; 454 | } 455 | 456 | /* HTS_SStreamSet_set_gv_switch: set GV switch */ 457 | void HTS_SStreamSet_set_gv_switch(HTS_SStreamSet * sss, size_t stream_index, size_t state_index, HTS_Boolean i) 458 | { 459 | sss->sstream[stream_index].gv_switch[state_index] = i; 460 | } 461 | 462 | /* HTS_SStreamSet_get_gv_switch: get GV switch */ 463 | HTS_Boolean HTS_SStreamSet_get_gv_switch(HTS_SStreamSet * sss, size_t stream_index, size_t state_index) 464 | { 465 | return sss->sstream[stream_index].gv_switch[state_index]; 466 | } 467 | 468 | /* HTS_SStreamSet_clear: free state stream set */ 469 | void HTS_SStreamSet_clear(HTS_SStreamSet * sss) 470 | { 471 | size_t i, j; 472 | HTS_SStream *sst; 473 | 474 | if (sss->sstream) { 475 | for (i = 0; i < sss->nstream; i++) { 476 | sst = &sss->sstream[i]; 477 | for (j = 0; j < sss->total_state; j++) { 478 | HTS_free(sst->mean[j]); 479 | HTS_free(sst->vari[j]); 480 | } 481 | if (sst->msd) 482 | HTS_free(sst->msd); 483 | HTS_free(sst->mean); 484 | HTS_free(sst->vari); 485 | for (j = 0; j < sst->win_size; j++) { 486 | sst->win_coefficient[j] += sst->win_l_width[j]; 487 | HTS_free(sst->win_coefficient[j]); 488 | } 489 | HTS_free(sst->win_coefficient); 490 | HTS_free(sst->win_l_width); 491 | HTS_free(sst->win_r_width); 492 | if (sst->gv_mean) 493 | HTS_free(sst->gv_mean); 494 | if (sst->gv_vari) 495 | HTS_free(sst->gv_vari); 496 | if (sst->gv_switch) 497 | HTS_free(sst->gv_switch); 498 | } 499 | HTS_free(sss->sstream); 500 | } 501 | if (sss->duration) 502 | HTS_free(sss->duration); 503 | 504 | HTS_SStreamSet_initialize(sss); 505 | } 506 | 507 | HTS_SSTREAM_C_END; 508 | 509 | #endif /* !HTS_SSTREAM_C */ 510 | -------------------------------------------------------------------------------- /src/lib/HTS_vocoder.c: -------------------------------------------------------------------------------- 1 | /* ----------------------------------------------------------------- */ 2 | /* The HMM-Based Speech Synthesis Engine "hts_engine API" */ 3 | /* developed by HTS Working Group */ 4 | /* http://hts-engine.sourceforge.net/ */ 5 | /* ----------------------------------------------------------------- */ 6 | /* */ 7 | /* Copyright (c) 2001-2014 Nagoya Institute of Technology */ 8 | /* Department of Computer Science */ 9 | /* */ 10 | /* 2001-2008 Tokyo Institute of Technology */ 11 | /* Interdisciplinary Graduate School of */ 12 | /* Science and Engineering */ 13 | /* */ 14 | /* All rights reserved. */ 15 | /* */ 16 | /* Redistribution and use in source and binary forms, with or */ 17 | /* without modification, are permitted provided that the following */ 18 | /* conditions are met: */ 19 | /* */ 20 | /* - Redistributions of source code must retain the above copyright */ 21 | /* notice, this list of conditions and the following disclaimer. */ 22 | /* - Redistributions in binary form must reproduce the above */ 23 | /* copyright notice, this list of conditions and the following */ 24 | /* disclaimer in the documentation and/or other materials provided */ 25 | /* with the distribution. */ 26 | /* - Neither the name of the HTS working group nor the names of its */ 27 | /* contributors may be used to endorse or promote products derived */ 28 | /* from this software without specific prior written permission. */ 29 | /* */ 30 | /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */ 31 | /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */ 32 | /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ 33 | /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ 34 | /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */ 35 | /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, */ 36 | /* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED */ 37 | /* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, */ 38 | /* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */ 39 | /* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, */ 40 | /* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY */ 41 | /* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ 42 | /* POSSIBILITY OF SUCH DAMAGE. */ 43 | /* ----------------------------------------------------------------- */ 44 | 45 | #ifndef HTS_VOCODER_C 46 | #define HTS_VOCODER_C 47 | 48 | #ifdef __cplusplus 49 | #define HTS_VOCODER_C_START extern "C" { 50 | #define HTS_VOCODER_C_END } 51 | #else 52 | #define HTS_VOCODER_C_START 53 | #define HTS_VOCODER_C_END 54 | #endif /* __CPLUSPLUS */ 55 | 56 | HTS_VOCODER_C_START; 57 | 58 | #include /* for sqrt(),log(),exp(),pow(),cos() */ 59 | 60 | /* hts_engine libraries */ 61 | #include "HTS_hidden.h" 62 | 63 | static const double HTS_pade[21] = { 64 | 1.00000000000, 65 | 1.00000000000, 66 | 0.00000000000, 67 | 1.00000000000, 68 | 0.00000000000, 69 | 0.00000000000, 70 | 1.00000000000, 71 | 0.00000000000, 72 | 0.00000000000, 73 | 0.00000000000, 74 | 1.00000000000, 75 | 0.49992730000, 76 | 0.10670050000, 77 | 0.01170221000, 78 | 0.00056562790, 79 | 1.00000000000, 80 | 0.49993910000, 81 | 0.11070980000, 82 | 0.01369984000, 83 | 0.00095648530, 84 | 0.00003041721 85 | }; 86 | 87 | /* HTS_movem: move memory */ 88 | static void HTS_movem(double *a, double *b, const int nitem) 89 | { 90 | long i = (long) nitem; 91 | 92 | if (a > b) 93 | while (i--) 94 | *b++ = *a++; 95 | else { 96 | a += i; 97 | b += i; 98 | while (i--) 99 | *--b = *--a; 100 | } 101 | } 102 | 103 | /* HTS_mlsafir: sub functions for MLSA filter */ 104 | static double HTS_mlsafir(const double x, const double *b, const int m, const double a, const double aa, double *d) 105 | { 106 | double y = 0.0; 107 | int i; 108 | 109 | d[0] = x; 110 | d[1] = aa * d[0] + a * d[1]; 111 | 112 | for (i = 2; i <= m; i++) 113 | d[i] += a * (d[i + 1] - d[i - 1]); 114 | 115 | for (i = 2; i <= m; i++) 116 | y += d[i] * b[i]; 117 | 118 | for (i = m + 1; i > 1; i--) 119 | d[i] = d[i - 1]; 120 | 121 | return (y); 122 | } 123 | 124 | /* HTS_mlsadf1: sub functions for MLSA filter */ 125 | static double HTS_mlsadf1(double x, const double *b, const int m, const double a, const double aa, const int pd, double *d, const double *ppade) 126 | { 127 | double v, out = 0.0, *pt; 128 | int i; 129 | 130 | pt = &d[pd + 1]; 131 | 132 | for (i = pd; i >= 1; i--) { 133 | d[i] = aa * pt[i - 1] + a * d[i]; 134 | pt[i] = d[i] * b[1]; 135 | v = pt[i] * ppade[i]; 136 | x += (1 & i) ? v : -v; 137 | out += v; 138 | } 139 | 140 | pt[0] = x; 141 | out += x; 142 | 143 | return (out); 144 | } 145 | 146 | /* HTS_mlsadf2: sub functions for MLSA filter */ 147 | static double HTS_mlsadf2(double x, const double *b, const int m, const double a, const double aa, const int pd, double *d, const double *ppade) 148 | { 149 | double v, out = 0.0, *pt; 150 | int i; 151 | 152 | pt = &d[pd * (m + 2)]; 153 | 154 | for (i = pd; i >= 1; i--) { 155 | pt[i] = HTS_mlsafir(pt[i - 1], b, m, a, aa, &d[(i - 1) * (m + 2)]); 156 | v = pt[i] * ppade[i]; 157 | 158 | x += (1 & i) ? v : -v; 159 | out += v; 160 | } 161 | 162 | pt[0] = x; 163 | out += x; 164 | 165 | return (out); 166 | } 167 | 168 | /* HTS_mlsadf: functions for MLSA filter */ 169 | static double HTS_mlsadf(double x, const double *b, const int m, const double a, const int pd, double *d) 170 | { 171 | const double aa = 1 - a * a; 172 | const double *ppade = &(HTS_pade[pd * (pd + 1) / 2]); 173 | 174 | x = HTS_mlsadf1(x, b, m, a, aa, pd, d, ppade); 175 | x = HTS_mlsadf2(x, b, m, a, aa, pd, &d[2 * (pd + 1)], ppade); 176 | 177 | return (x); 178 | } 179 | 180 | /* HTS_rnd: functions for random noise generation */ 181 | static double HTS_rnd(unsigned long *next) 182 | { 183 | double r; 184 | 185 | *next = *next * 1103515245L + 12345; 186 | r = (*next / 65536L) % 32768L; 187 | 188 | return (r / RANDMAX); 189 | } 190 | 191 | /* HTS_nrandom: functions for gaussian random noise generation */ 192 | static double HTS_nrandom(HTS_Vocoder * v) 193 | { 194 | if (v->sw == 0) { 195 | v->sw = 1; 196 | do { 197 | v->r1 = 2 * HTS_rnd(&v->next) - 1; 198 | v->r2 = 2 * HTS_rnd(&v->next) - 1; 199 | v->s = v->r1 * v->r1 + v->r2 * v->r2; 200 | } while (v->s > 1 || v->s == 0); 201 | v->s = sqrt(-2 * log(v->s) / v->s); 202 | return (v->r1 * v->s); 203 | } else { 204 | v->sw = 0; 205 | return (v->r2 * v->s); 206 | } 207 | } 208 | 209 | /* HTS_mceq: function for M-sequence random noise generation */ 210 | static int HTS_mseq(HTS_Vocoder * v) 211 | { 212 | int x0, x28; 213 | 214 | v->x >>= 1; 215 | if (v->x & B0) 216 | x0 = 1; 217 | else 218 | x0 = -1; 219 | if (v->x & B28) 220 | x28 = 1; 221 | else 222 | x28 = -1; 223 | if (x0 + x28) 224 | v->x &= B31_; 225 | else 226 | v->x |= B31; 227 | 228 | return (x0); 229 | } 230 | 231 | /* HTS_mc2b: transform mel-cepstrum to MLSA digital fillter coefficients */ 232 | static void HTS_mc2b(double *mc, double *b, int m, const double a) 233 | { 234 | if (mc != b) { 235 | if (a != 0.0) { 236 | b[m] = mc[m]; 237 | for (m--; m >= 0; m--) 238 | b[m] = mc[m] - a * b[m + 1]; 239 | } else 240 | HTS_movem(mc, b, m + 1); 241 | } else if (a != 0.0) 242 | for (m--; m >= 0; m--) 243 | b[m] -= a * b[m + 1]; 244 | } 245 | 246 | /* HTS_b2bc: transform MLSA digital filter coefficients to mel-cepstrum */ 247 | static void HTS_b2mc(const double *b, double *mc, int m, const double a) 248 | { 249 | double d, o; 250 | 251 | d = mc[m] = b[m]; 252 | for (m--; m >= 0; m--) { 253 | o = b[m] + a * d; 254 | d = b[m]; 255 | mc[m] = o; 256 | } 257 | } 258 | 259 | /* HTS_freqt: frequency transformation */ 260 | static void HTS_freqt(HTS_Vocoder * v, const double *c1, const int m1, double *c2, const int m2, const double a) 261 | { 262 | int i, j; 263 | const double b = 1 - a * a; 264 | double *g; 265 | 266 | if (m2 > v->freqt_size) { 267 | if (v->freqt_buff != NULL) 268 | HTS_free(v->freqt_buff); 269 | v->freqt_buff = (double *) HTS_calloc(m2 + m2 + 2, sizeof(double)); 270 | v->freqt_size = m2; 271 | } 272 | g = v->freqt_buff + v->freqt_size + 1; 273 | 274 | for (i = 0; i < m2 + 1; i++) 275 | g[i] = 0.0; 276 | 277 | for (i = -m1; i <= 0; i++) { 278 | if (0 <= m2) 279 | g[0] = c1[-i] + a * (v->freqt_buff[0] = g[0]); 280 | if (1 <= m2) 281 | g[1] = b * v->freqt_buff[0] + a * (v->freqt_buff[1] = g[1]); 282 | for (j = 2; j <= m2; j++) 283 | g[j] = v->freqt_buff[j - 1] + a * ((v->freqt_buff[j] = g[j]) - g[j - 1]); 284 | } 285 | 286 | HTS_movem(g, c2, m2 + 1); 287 | } 288 | 289 | /* HTS_c2ir: The minimum phase impulse response is evaluated from the minimum phase cepstrum */ 290 | static void HTS_c2ir(const double *c, const int nc, double *h, const int leng) 291 | { 292 | int n, k, upl; 293 | double d; 294 | 295 | h[0] = exp(c[0]); 296 | for (n = 1; n < leng; n++) { 297 | d = 0; 298 | upl = (n >= nc) ? nc - 1 : n; 299 | for (k = 1; k <= upl; k++) 300 | d += k * c[k] * h[n - k]; 301 | h[n] = d / n; 302 | } 303 | } 304 | 305 | /* HTS_b2en: calculate frame energy */ 306 | static double HTS_b2en(HTS_Vocoder * v, const double *b, const int m, const double a) 307 | { 308 | int i; 309 | double en = 0.0; 310 | double *cep; 311 | double *ir; 312 | 313 | if (v->spectrum2en_size < m) { 314 | if (v->spectrum2en_buff != NULL) 315 | HTS_free(v->spectrum2en_buff); 316 | v->spectrum2en_buff = (double *) HTS_calloc((m + 1) + 2 * IRLENG, sizeof(double)); 317 | v->spectrum2en_size = m; 318 | } 319 | cep = v->spectrum2en_buff + m + 1; 320 | ir = cep + IRLENG; 321 | 322 | HTS_b2mc(b, v->spectrum2en_buff, m, a); 323 | HTS_freqt(v, v->spectrum2en_buff, m, cep, IRLENG - 1, -a); 324 | HTS_c2ir(cep, IRLENG, ir, IRLENG); 325 | 326 | for (i = 0; i < IRLENG; i++) 327 | en += ir[i] * ir[i]; 328 | 329 | return (en); 330 | } 331 | 332 | /* HTS_ignorm: inverse gain normalization */ 333 | static void HTS_ignorm(double *c1, double *c2, int m, const double g) 334 | { 335 | double k; 336 | if (g != 0.0) { 337 | k = pow(c1[0], g); 338 | for (; m >= 1; m--) 339 | c2[m] = k * c1[m]; 340 | c2[0] = (k - 1.0) / g; 341 | } else { 342 | HTS_movem(&c1[1], &c2[1], m); 343 | c2[0] = log(c1[0]); 344 | } 345 | } 346 | 347 | /* HTS_gnorm: gain normalization */ 348 | static void HTS_gnorm(double *c1, double *c2, int m, const double g) 349 | { 350 | double k; 351 | if (g != 0.0) { 352 | k = 1.0 + g * c1[0]; 353 | for (; m >= 1; m--) 354 | c2[m] = c1[m] / k; 355 | c2[0] = pow(k, 1.0 / g); 356 | } else { 357 | HTS_movem(&c1[1], &c2[1], m); 358 | c2[0] = exp(c1[0]); 359 | } 360 | } 361 | 362 | /* HTS_lsp2lpc: transform LSP to LPC */ 363 | static void HTS_lsp2lpc(HTS_Vocoder * v, double *lsp, double *a, const int m) 364 | { 365 | int i, k, mh1, mh2, flag_odd; 366 | double xx, xf, xff; 367 | double *p, *q; 368 | double *a0, *a1, *a2, *b0, *b1, *b2; 369 | 370 | flag_odd = 0; 371 | if (m % 2 == 0) 372 | mh1 = mh2 = m / 2; 373 | else { 374 | mh1 = (m + 1) / 2; 375 | mh2 = (m - 1) / 2; 376 | flag_odd = 1; 377 | } 378 | 379 | if (m > v->lsp2lpc_size) { 380 | if (v->lsp2lpc_buff != NULL) 381 | HTS_free(v->lsp2lpc_buff); 382 | v->lsp2lpc_buff = (double *) HTS_calloc(5 * m + 6, sizeof(double)); 383 | v->lsp2lpc_size = m; 384 | } 385 | p = v->lsp2lpc_buff + m; 386 | q = p + mh1; 387 | a0 = q + mh2; 388 | a1 = a0 + (mh1 + 1); 389 | a2 = a1 + (mh1 + 1); 390 | b0 = a2 + (mh1 + 1); 391 | b1 = b0 + (mh2 + 1); 392 | b2 = b1 + (mh2 + 1); 393 | 394 | HTS_movem(lsp, v->lsp2lpc_buff, m); 395 | 396 | for (i = 0; i < mh1 + 1; i++) 397 | a0[i] = 0.0; 398 | for (i = 0; i < mh1 + 1; i++) 399 | a1[i] = 0.0; 400 | for (i = 0; i < mh1 + 1; i++) 401 | a2[i] = 0.0; 402 | for (i = 0; i < mh2 + 1; i++) 403 | b0[i] = 0.0; 404 | for (i = 0; i < mh2 + 1; i++) 405 | b1[i] = 0.0; 406 | for (i = 0; i < mh2 + 1; i++) 407 | b2[i] = 0.0; 408 | 409 | /* lsp filter parameters */ 410 | for (i = k = 0; i < mh1; i++, k += 2) 411 | p[i] = -2.0 * cos(v->lsp2lpc_buff[k]); 412 | for (i = k = 0; i < mh2; i++, k += 2) 413 | q[i] = -2.0 * cos(v->lsp2lpc_buff[k + 1]); 414 | 415 | /* impulse response of analysis filter */ 416 | xx = 1.0; 417 | xf = xff = 0.0; 418 | 419 | for (k = 0; k <= m; k++) { 420 | if (flag_odd) { 421 | a0[0] = xx; 422 | b0[0] = xx - xff; 423 | xff = xf; 424 | xf = xx; 425 | } else { 426 | a0[0] = xx + xf; 427 | b0[0] = xx - xf; 428 | xf = xx; 429 | } 430 | 431 | for (i = 0; i < mh1; i++) { 432 | a0[i + 1] = a0[i] + p[i] * a1[i] + a2[i]; 433 | a2[i] = a1[i]; 434 | a1[i] = a0[i]; 435 | } 436 | 437 | for (i = 0; i < mh2; i++) { 438 | b0[i + 1] = b0[i] + q[i] * b1[i] + b2[i]; 439 | b2[i] = b1[i]; 440 | b1[i] = b0[i]; 441 | } 442 | 443 | if (k != 0) 444 | a[k - 1] = -0.5 * (a0[mh1] + b0[mh2]); 445 | xx = 0.0; 446 | } 447 | 448 | for (i = m - 1; i >= 0; i--) 449 | a[i + 1] = -a[i]; 450 | a[0] = 1.0; 451 | } 452 | 453 | /* HTS_gc2gc: generalized cepstral transformation */ 454 | static void HTS_gc2gc(HTS_Vocoder * v, double *c1, const int m1, const double g1, double *c2, const int m2, const double g2) 455 | { 456 | int i, min, k, mk; 457 | double ss1, ss2, cc; 458 | 459 | if (m1 > v->gc2gc_size) { 460 | if (v->gc2gc_buff != NULL) 461 | HTS_free(v->gc2gc_buff); 462 | v->gc2gc_buff = (double *) HTS_calloc(m1 + 1, sizeof(double)); 463 | v->gc2gc_size = m1; 464 | } 465 | 466 | HTS_movem(c1, v->gc2gc_buff, m1 + 1); 467 | 468 | c2[0] = v->gc2gc_buff[0]; 469 | for (i = 1; i <= m2; i++) { 470 | ss1 = ss2 = 0.0; 471 | min = m1 < i ? m1 : i - 1; 472 | for (k = 1; k <= min; k++) { 473 | mk = i - k; 474 | cc = v->gc2gc_buff[k] * c2[mk]; 475 | ss2 += k * cc; 476 | ss1 += mk * cc; 477 | } 478 | 479 | if (i <= m1) 480 | c2[i] = v->gc2gc_buff[i] + (g2 * ss2 - g1 * ss1) / i; 481 | else 482 | c2[i] = (g2 * ss2 - g1 * ss1) / i; 483 | } 484 | } 485 | 486 | /* HTS_mgc2mgc: frequency and generalized cepstral transformation */ 487 | static void HTS_mgc2mgc(HTS_Vocoder * v, double *c1, const int m1, const double a1, const double g1, double *c2, const int m2, const double a2, const double g2) 488 | { 489 | double a; 490 | 491 | if (a1 == a2) { 492 | HTS_gnorm(c1, c1, m1, g1); 493 | HTS_gc2gc(v, c1, m1, g1, c2, m2, g2); 494 | HTS_ignorm(c2, c2, m2, g2); 495 | } else { 496 | a = (a2 - a1) / (1 - a1 * a2); 497 | HTS_freqt(v, c1, m1, c2, m2, a); 498 | HTS_gnorm(c2, c2, m2, g1); 499 | HTS_gc2gc(v, c2, m2, g1, c2, m2, g2); 500 | HTS_ignorm(c2, c2, m2, g2); 501 | } 502 | } 503 | 504 | /* HTS_lsp2mgc: transform LSP to MGC */ 505 | static void HTS_lsp2mgc(HTS_Vocoder * v, double *lsp, double *mgc, const int m, const double alpha) 506 | { 507 | int i; 508 | /* lsp2lpc */ 509 | HTS_lsp2lpc(v, lsp + 1, mgc, m); 510 | if (v->use_log_gain) 511 | mgc[0] = exp(lsp[0]); 512 | else 513 | mgc[0] = lsp[0]; 514 | 515 | /* mgc2mgc */ 516 | if (NORMFLG1) 517 | HTS_ignorm(mgc, mgc, m, v->gamma); 518 | else if (MULGFLG1) 519 | mgc[0] = (1.0 - mgc[0]) * ((double) v->stage); 520 | if (MULGFLG1) 521 | for (i = m; i >= 1; i--) 522 | mgc[i] *= -((double) v->stage); 523 | HTS_mgc2mgc(v, mgc, m, alpha, v->gamma, mgc, m, alpha, v->gamma); 524 | if (NORMFLG2) 525 | HTS_gnorm(mgc, mgc, m, v->gamma); 526 | else if (MULGFLG2) 527 | mgc[0] = mgc[0] * v->gamma + 1.0; 528 | if (MULGFLG2) 529 | for (i = m; i >= 1; i--) 530 | mgc[i] *= v->gamma; 531 | } 532 | 533 | /* HTS_mglsadff: sub functions for MGLSA filter */ 534 | static double HTS_mglsadff(double x, const double *b, const int m, const double a, double *d) 535 | { 536 | int i; 537 | 538 | double y; 539 | y = d[0] * b[1]; 540 | for (i = 1; i < m; i++) { 541 | d[i] += a * (d[i + 1] - d[i - 1]); 542 | y += d[i] * b[i + 1]; 543 | } 544 | x -= y; 545 | 546 | for (i = m; i > 0; i--) 547 | d[i] = d[i - 1]; 548 | d[0] = a * d[0] + (1 - a * a) * x; 549 | return x; 550 | } 551 | 552 | /* HTS_mglsadf: sub functions for MGLSA filter */ 553 | static double HTS_mglsadf(double x, const double *b, const int m, const double a, const int n, double *d) 554 | { 555 | int i; 556 | 557 | for (i = 0; i < n; i++) 558 | x = HTS_mglsadff(x, b, m, a, &d[i * (m + 1)]); 559 | 560 | return x; 561 | } 562 | 563 | /* THS_check_lsp_stability: check LSP stability */ 564 | static void HTS_check_lsp_stability(double *lsp, size_t m) 565 | { 566 | size_t i, j; 567 | double tmp; 568 | double min = (CHECK_LSP_STABILITY_MIN * PI) / (m + 1); 569 | HTS_Boolean find; 570 | 571 | for (i = 0; i < CHECK_LSP_STABILITY_NUM; i++) { 572 | find = FALSE; 573 | 574 | for (j = 1; j < m; j++) { 575 | tmp = lsp[j + 1] - lsp[j]; 576 | if (tmp < min) { 577 | lsp[j] -= 0.5 * (min - tmp); 578 | lsp[j + 1] += 0.5 * (min - tmp); 579 | find = TRUE; 580 | } 581 | } 582 | 583 | if (lsp[1] < min) { 584 | lsp[1] = min; 585 | find = TRUE; 586 | } 587 | if (lsp[m] > PI - min) { 588 | lsp[m] = PI - min; 589 | find = TRUE; 590 | } 591 | 592 | if (find == FALSE) 593 | break; 594 | } 595 | } 596 | 597 | /* HTS_lsp2en: calculate frame energy */ 598 | static double HTS_lsp2en(HTS_Vocoder * v, double *lsp, size_t m, double alpha) 599 | { 600 | size_t i; 601 | double en = 0.0; 602 | double *buff; 603 | 604 | if (v->spectrum2en_size < m) { 605 | if (v->spectrum2en_buff != NULL) 606 | HTS_free(v->spectrum2en_buff); 607 | v->spectrum2en_buff = (double *) HTS_calloc(m + 1 + IRLENG, sizeof(double)); 608 | v->spectrum2en_size = m; 609 | } 610 | buff = v->spectrum2en_buff + m + 1; 611 | 612 | /* lsp2lpc */ 613 | HTS_lsp2lpc(v, lsp + 1, v->spectrum2en_buff, m); 614 | if (v->use_log_gain) 615 | v->spectrum2en_buff[0] = exp(lsp[0]); 616 | else 617 | v->spectrum2en_buff[0] = lsp[0]; 618 | 619 | /* mgc2mgc */ 620 | if (NORMFLG1) 621 | HTS_ignorm(v->spectrum2en_buff, v->spectrum2en_buff, m, v->gamma); 622 | else if (MULGFLG1) 623 | v->spectrum2en_buff[0] = (1.0 - v->spectrum2en_buff[0]) * ((double) v->stage); 624 | if (MULGFLG1) 625 | for (i = 1; i <= m; i++) 626 | v->spectrum2en_buff[i] *= -((double) v->stage); 627 | HTS_mgc2mgc(v, v->spectrum2en_buff, m, alpha, v->gamma, buff, IRLENG - 1, 0.0, 1); 628 | 629 | for (i = 0; i < IRLENG; i++) 630 | en += buff[i] * buff[i]; 631 | return en; 632 | } 633 | 634 | /* HTS_white_noise: return white noise */ 635 | static double HTS_white_noise(HTS_Vocoder * v) 636 | { 637 | if (v->gauss) 638 | return (double) HTS_nrandom(v); 639 | else 640 | return (double) HTS_mseq(v); 641 | } 642 | 643 | /* HTS_Vocoder_initialize_excitation: initialize excitation */ 644 | static void HTS_Vocoder_initialize_excitation(HTS_Vocoder * v, double pitch, size_t nlpf) 645 | { 646 | size_t i; 647 | 648 | v->pitch_of_curr_point = pitch; 649 | v->pitch_counter = pitch; 650 | v->pitch_inc_per_point = 0.0; 651 | if (nlpf > 0) { 652 | v->excite_buff_size = nlpf; 653 | v->excite_ring_buff = (double *) HTS_calloc(v->excite_buff_size, sizeof(double)); 654 | for (i = 0; i < v->excite_buff_size; i++) 655 | v->excite_ring_buff[i] = 0.0; 656 | v->excite_buff_index = 0; 657 | } else { 658 | v->excite_buff_size = 0; 659 | v->excite_ring_buff = NULL; 660 | v->excite_buff_index = 0; 661 | } 662 | } 663 | 664 | /* HTS_Vocoder_start_excitation: start excitation of each frame */ 665 | static void HTS_Vocoder_start_excitation(HTS_Vocoder * v, double pitch) 666 | { 667 | if (v->pitch_of_curr_point != 0.0 && pitch != 0.0) { 668 | v->pitch_inc_per_point = (pitch - v->pitch_of_curr_point) / v->fprd; 669 | } else { 670 | v->pitch_inc_per_point = 0.0; 671 | v->pitch_of_curr_point = pitch; 672 | v->pitch_counter = pitch; 673 | } 674 | } 675 | 676 | /* HTS_Vocoder_excite_unvoiced_frame: ping noise to ring buffer */ 677 | static void HTS_Vocoder_excite_unvoiced_frame(HTS_Vocoder * v, double noise) 678 | { 679 | size_t center = (v->excite_buff_size - 1) / 2; 680 | v->excite_ring_buff[(v->excite_buff_index + center) % v->excite_buff_size] += noise; 681 | } 682 | 683 | /* HTS_Vocoder_excite_vooiced_frame: ping noise and pulse to ring buffer */ 684 | static void HTS_Vocoder_excite_voiced_frame(HTS_Vocoder * v, double noise, double pulse, const double *lpf) 685 | { 686 | size_t i; 687 | size_t center = (v->excite_buff_size - 1) / 2; 688 | 689 | if (noise != 0.0) { 690 | for (i = 0; i < v->excite_buff_size; i++) { 691 | if (i == center) 692 | v->excite_ring_buff[(v->excite_buff_index + i) % v->excite_buff_size] += noise * (1.0 - lpf[i]); 693 | else 694 | v->excite_ring_buff[(v->excite_buff_index + i) % v->excite_buff_size] += noise * (0.0 - lpf[i]); 695 | } 696 | } 697 | if (pulse != 0.0) { 698 | for (i = 0; i < v->excite_buff_size; i++) 699 | v->excite_ring_buff[(v->excite_buff_index + i) % v->excite_buff_size] += pulse * lpf[i]; 700 | } 701 | } 702 | 703 | /* HTS_Vocoder_get_excitation: get excitation of each sample */ 704 | static double HTS_Vocoder_get_excitation(HTS_Vocoder * v, const double *lpf) 705 | { 706 | double x; 707 | double noise, pulse = 0.0; 708 | 709 | if (v->excite_buff_size > 0) { 710 | noise = HTS_white_noise(v); 711 | pulse = 0.0; 712 | if (v->pitch_of_curr_point == 0.0) { 713 | HTS_Vocoder_excite_unvoiced_frame(v, noise); 714 | } else { 715 | v->pitch_counter += 1.0; 716 | if (v->pitch_counter >= v->pitch_of_curr_point) { 717 | pulse = sqrt(v->pitch_of_curr_point); 718 | v->pitch_counter -= v->pitch_of_curr_point; 719 | } 720 | HTS_Vocoder_excite_voiced_frame(v, noise, pulse, lpf); 721 | v->pitch_of_curr_point += v->pitch_inc_per_point; 722 | } 723 | x = v->excite_ring_buff[v->excite_buff_index]; 724 | v->excite_ring_buff[v->excite_buff_index] = 0.0; 725 | v->excite_buff_index++; 726 | if (v->excite_buff_index >= v->excite_buff_size) 727 | v->excite_buff_index = 0; 728 | } else { 729 | if (v->pitch_of_curr_point == 0.0) { 730 | x = HTS_white_noise(v); 731 | } else { 732 | v->pitch_counter += 1.0; 733 | if (v->pitch_counter >= v->pitch_of_curr_point) { 734 | x = sqrt(v->pitch_of_curr_point); 735 | v->pitch_counter -= v->pitch_of_curr_point; 736 | } else { 737 | x = 0.0; 738 | } 739 | v->pitch_of_curr_point += v->pitch_inc_per_point; 740 | } 741 | } 742 | 743 | return x; 744 | } 745 | 746 | /* HTS_Vocoder_end_excitation: end excitation of each frame */ 747 | static void HTS_Vocoder_end_excitation(HTS_Vocoder * v, double pitch) 748 | { 749 | v->pitch_of_curr_point = pitch; 750 | } 751 | 752 | /* HTS_Vocoder_postfilter_mcp: postfilter for MCP */ 753 | static void HTS_Vocoder_postfilter_mcp(HTS_Vocoder * v, double *mcp, const int m, double alpha, double beta) 754 | { 755 | double e1, e2; 756 | int k; 757 | 758 | if (beta > 0.0 && m > 1) { 759 | if (v->postfilter_size < m) { 760 | if (v->postfilter_buff != NULL) 761 | HTS_free(v->postfilter_buff); 762 | v->postfilter_buff = (double *) HTS_calloc(m + 1, sizeof(double)); 763 | v->postfilter_size = m; 764 | } 765 | HTS_mc2b(mcp, v->postfilter_buff, m, alpha); 766 | e1 = HTS_b2en(v, v->postfilter_buff, m, alpha); 767 | 768 | v->postfilter_buff[1] -= beta * alpha * v->postfilter_buff[2]; 769 | for (k = 2; k <= m; k++) 770 | v->postfilter_buff[k] *= (1.0 + beta); 771 | 772 | e2 = HTS_b2en(v, v->postfilter_buff, m, alpha); 773 | v->postfilter_buff[0] += log(e1 / e2) / 2; 774 | HTS_b2mc(v->postfilter_buff, mcp, m, alpha); 775 | } 776 | } 777 | 778 | /* HTS_Vocoder_postfilter_lsp: postfilter for LSP */ 779 | static void HTS_Vocoder_postfilter_lsp(HTS_Vocoder * v, double *lsp, size_t m, double alpha, double beta) 780 | { 781 | double e1, e2; 782 | size_t i; 783 | double d1, d2; 784 | 785 | if (beta > 0.0 && m > 1) { 786 | if (v->postfilter_size < m) { 787 | if (v->postfilter_buff != NULL) 788 | HTS_free(v->postfilter_buff); 789 | v->postfilter_buff = (double *) HTS_calloc(m + 1, sizeof(double)); 790 | v->postfilter_size = m; 791 | } 792 | 793 | e1 = HTS_lsp2en(v, lsp, m, alpha); 794 | 795 | /* postfiltering */ 796 | for (i = 0; i <= m; i++) { 797 | if (i > 1 && i < m) { 798 | d1 = beta * (lsp[i + 1] - lsp[i]); 799 | d2 = beta * (lsp[i] - lsp[i - 1]); 800 | v->postfilter_buff[i] = lsp[i - 1] + d2 + (d2 * d2 * ((lsp[i + 1] - lsp[i - 1]) - (d1 + d2))) / ((d2 * d2) + (d1 * d1)); 801 | } else { 802 | v->postfilter_buff[i] = lsp[i]; 803 | } 804 | } 805 | HTS_movem(v->postfilter_buff, lsp, m + 1); 806 | 807 | e2 = HTS_lsp2en(v, lsp, m, alpha); 808 | 809 | if (e1 != e2) { 810 | if (v->use_log_gain) 811 | lsp[0] += 0.5 * log(e1 / e2); 812 | else 813 | lsp[0] *= sqrt(e1 / e2); 814 | } 815 | } 816 | } 817 | 818 | /* HTS_Vocoder_initialize: initialize vocoder */ 819 | void HTS_Vocoder_initialize(HTS_Vocoder * v, size_t m, size_t stage, HTS_Boolean use_log_gain, size_t rate, size_t fperiod) 820 | { 821 | /* set parameter */ 822 | v->is_first = TRUE; 823 | v->stage = stage; 824 | if (stage != 0) 825 | v->gamma = -1.0 / v->stage; 826 | else 827 | v->gamma = 0.0; 828 | v->use_log_gain = use_log_gain; 829 | v->fprd = fperiod; 830 | v->next = SEED; 831 | v->gauss = GAUSS; 832 | v->rate = rate; 833 | v->pitch_of_curr_point = 0.0; 834 | v->pitch_counter = 0.0; 835 | v->pitch_inc_per_point = 0.0; 836 | v->excite_ring_buff = NULL; 837 | v->excite_buff_size = 0; 838 | v->excite_buff_index = 0; 839 | v->sw = 0; 840 | v->x = 0x55555555; 841 | /* init buffer */ 842 | v->freqt_buff = NULL; 843 | v->freqt_size = 0; 844 | v->gc2gc_buff = NULL; 845 | v->gc2gc_size = 0; 846 | v->lsp2lpc_buff = NULL; 847 | v->lsp2lpc_size = 0; 848 | v->postfilter_buff = NULL; 849 | v->postfilter_size = 0; 850 | v->spectrum2en_buff = NULL; 851 | v->spectrum2en_size = 0; 852 | if (v->stage == 0) { /* for MCP */ 853 | v->c = (double *) HTS_calloc(m * (3 + PADEORDER) + 5 * PADEORDER + 6, sizeof(double)); 854 | v->cc = v->c + m + 1; 855 | v->cinc = v->cc + m + 1; 856 | v->d1 = v->cinc + m + 1; 857 | } else { /* for LSP */ 858 | v->c = (double *) HTS_calloc((m + 1) * (v->stage + 3), sizeof(double)); 859 | v->cc = v->c + m + 1; 860 | v->cinc = v->cc + m + 1; 861 | v->d1 = v->cinc + m + 1; 862 | } 863 | } 864 | 865 | /* HTS_Vocoder_synthesize: pulse/noise excitation and MLSA/MGLSA filster based waveform synthesis */ 866 | void HTS_Vocoder_synthesize(HTS_Vocoder * v, size_t m, double lf0, double *spectrum, size_t nlpf, double *lpf, double alpha, double beta, double volume, double *rawdata, HTS_Audio * audio) 867 | { 868 | double x; 869 | int i, j; 870 | short xs; 871 | int rawidx = 0; 872 | double p; 873 | 874 | /* lf0 -> pitch */ 875 | if (lf0 == LZERO) 876 | p = 0.0; 877 | else if (lf0 <= MIN_LF0) 878 | p = v->rate / MIN_F0; 879 | else if (lf0 >= MAX_LF0) 880 | p = v->rate / MAX_F0; 881 | else 882 | p = v->rate / exp(lf0); 883 | 884 | /* first time */ 885 | if (v->is_first == TRUE) { 886 | HTS_Vocoder_initialize_excitation(v, p, nlpf); 887 | if (v->stage == 0) { /* for MCP */ 888 | HTS_mc2b(spectrum, v->c, m, alpha); 889 | } else { /* for LSP */ 890 | HTS_movem(spectrum, v->c, m + 1); 891 | HTS_lsp2mgc(v, v->c, v->c, m, alpha); 892 | HTS_mc2b(v->c, v->c, m, alpha); 893 | HTS_gnorm(v->c, v->c, m, v->gamma); 894 | for (i = 1; i <= m; i++) 895 | v->c[i] *= v->gamma; 896 | } 897 | v->is_first = FALSE; 898 | } 899 | 900 | HTS_Vocoder_start_excitation(v, p); 901 | if (v->stage == 0) { /* for MCP */ 902 | HTS_Vocoder_postfilter_mcp(v, spectrum, m, alpha, beta); 903 | HTS_mc2b(spectrum, v->cc, m, alpha); 904 | for (i = 0; i <= m; i++) 905 | v->cinc[i] = (v->cc[i] - v->c[i]) / v->fprd; 906 | } else { /* for LSP */ 907 | HTS_Vocoder_postfilter_lsp(v, spectrum, m, alpha, beta); 908 | HTS_check_lsp_stability(spectrum, m); 909 | HTS_lsp2mgc(v, spectrum, v->cc, m, alpha); 910 | HTS_mc2b(v->cc, v->cc, m, alpha); 911 | HTS_gnorm(v->cc, v->cc, m, v->gamma); 912 | for (i = 1; i <= m; i++) 913 | v->cc[i] *= v->gamma; 914 | for (i = 0; i <= m; i++) 915 | v->cinc[i] = (v->cc[i] - v->c[i]) / v->fprd; 916 | } 917 | 918 | for (j = 0; j < v->fprd; j++) { 919 | x = HTS_Vocoder_get_excitation(v, lpf); 920 | if (v->stage == 0) { /* for MCP */ 921 | if (x != 0.0) 922 | x *= exp(v->c[0]); 923 | x = HTS_mlsadf(x, v->c, m, alpha, PADEORDER, v->d1); 924 | } else { /* for LSP */ 925 | if (!NGAIN) 926 | x *= v->c[0]; 927 | x = HTS_mglsadf(x, v->c, m, alpha, v->stage, v->d1); 928 | } 929 | x *= volume; 930 | 931 | /* output */ 932 | if (rawdata) 933 | rawdata[rawidx++] = x; 934 | if (audio) { 935 | if (x > 32767.0) 936 | xs = 32767; 937 | else if (x < -32768.0) 938 | xs = -32768; 939 | else 940 | xs = (short) x; 941 | HTS_Audio_write(audio, xs); 942 | } 943 | 944 | for (i = 0; i <= m; i++) 945 | v->c[i] += v->cinc[i]; 946 | } 947 | 948 | HTS_Vocoder_end_excitation(v, p); 949 | HTS_movem(v->cc, v->c, m + 1); 950 | } 951 | 952 | /* HTS_Vocoder_clear: clear vocoder */ 953 | void HTS_Vocoder_clear(HTS_Vocoder * v) 954 | { 955 | if (v != NULL) { 956 | /* free buffer */ 957 | if (v->freqt_buff != NULL) { 958 | HTS_free(v->freqt_buff); 959 | v->freqt_buff = NULL; 960 | } 961 | v->freqt_size = 0; 962 | if (v->gc2gc_buff != NULL) { 963 | HTS_free(v->gc2gc_buff); 964 | v->gc2gc_buff = NULL; 965 | } 966 | v->gc2gc_size = 0; 967 | if (v->lsp2lpc_buff != NULL) { 968 | HTS_free(v->lsp2lpc_buff); 969 | v->lsp2lpc_buff = NULL; 970 | } 971 | v->lsp2lpc_size = 0; 972 | if (v->postfilter_buff != NULL) { 973 | HTS_free(v->postfilter_buff); 974 | v->postfilter_buff = NULL; 975 | } 976 | v->postfilter_size = 0; 977 | if (v->spectrum2en_buff != NULL) { 978 | HTS_free(v->spectrum2en_buff); 979 | v->spectrum2en_buff = NULL; 980 | } 981 | v->spectrum2en_size = 0; 982 | if (v->c != NULL) { 983 | HTS_free(v->c); 984 | v->c = NULL; 985 | } 986 | v->excite_buff_size = 0; 987 | v->excite_buff_index = 0; 988 | if (v->excite_ring_buff != NULL) { 989 | HTS_free(v->excite_ring_buff); 990 | v->excite_ring_buff = NULL; 991 | } 992 | } 993 | } 994 | 995 | HTS_VOCODER_C_END; 996 | 997 | #endif /* !HTS_VOCODER_C */ 998 | -------------------------------------------------------------------------------- /src/lib/Makefile.am: -------------------------------------------------------------------------------- 1 | 2 | EXTRA_DIST = Makefile.mak 3 | 4 | AM_CPPFLAGS = -I @top_srcdir@/include 5 | 6 | lib_LIBRARIES = libHTSEngine.a 7 | 8 | libHTSEngine_a_SOURCES = HTS_audio.c HTS_engine.c HTS_hidden.h HTS_misc.c \ 9 | HTS_pstream.c HTS_sstream.c HTS_model.c HTS_vocoder.c \ 10 | HTS_gstream.c HTS_label.c 11 | 12 | DISTCLEANFILES = *.log *.out *~ 13 | 14 | MAINTAINERCLEANFILES = Makefile.in 15 | -------------------------------------------------------------------------------- /src/lib/Makefile.mak: -------------------------------------------------------------------------------- 1 | 2 | CC = cl 3 | 4 | CFLAGS = /O2 /Ob2 /Oi /Ot /Oy /GT /GL /TC /I ..\include 5 | LFLAGS = /LTCG 6 | 7 | CORES = HTS_audio.obj HTS_engine.obj HTS_gstream.obj HTS_label.obj HTS_misc.obj HTS_model.obj HTS_pstream.obj HTS_sstream.obj HTS_vocoder.obj 8 | 9 | all: hts_engine_API.lib 10 | 11 | hts_engine_API.lib: $(CORES) 12 | lib $(LFLAGS) /OUT:$@ $(CORES) 13 | 14 | .c.obj: 15 | $(CC) $(CFLAGS) /c $< 16 | 17 | clean: 18 | del *.lib 19 | del *.obj 20 | --------------------------------------------------------------------------------