├── .gitignore ├── .gitmodules ├── Dockerfile ├── LICENSE ├── Makefile ├── example └── main.go ├── go.mod ├── gopiper.cpp ├── gopiper.h └── piper.go /.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | *.a 3 | *.onnx 4 | *.json 5 | *.tar.gz -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "piper"] 2 | path = piper 3 | url = https://github.com/rhasspy/piper 4 | [submodule "piper-phonemize"] 5 | path = piper-phonemize 6 | url = https://github.com/rhasspy/piper-phonemize 7 | [submodule "espeak"] 8 | path = espeak 9 | url = https://github.com/rhasspy/espeak-ng 10 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM golang:1.20-bullseye as build-amd64 2 | 3 | FROM quay.io/pypa/manylinux_2_28_aarch64 as build-arm64 4 | 5 | FROM debian:bullseye as build-armv7 6 | 7 | RUN apt-get update && \ 8 | apt-get install --yes --no-install-recommends \ 9 | build-essential cmake ca-certificates curl pkg-config 10 | 11 | # ----------------------------------------------------------------------------- 12 | 13 | ARG TARGETARCH 14 | ARG TARGETVARIANT 15 | FROM build-${TARGETARCH}${TARGETVARIANT} as build 16 | ARG TARGETARCH 17 | ARG TARGETVARIANT 18 | 19 | ENV LANG C.UTF-8 20 | ENV DEBIAN_FRONTEND=noninteractive 21 | 22 | WORKDIR /build 23 | RUN apt-get update 24 | RUN apt-get install -y cmake wget 25 | ARG SPDLOG_VERSION="1.11.0" 26 | RUN curl -L "https://github.com/gabime/spdlog/archive/refs/tags/v${SPDLOG_VERSION}.tar.gz" | \ 27 | tar -xzvf - && \ 28 | mkdir -p "spdlog-${SPDLOG_VERSION}/build" && \ 29 | cd "spdlog-${SPDLOG_VERSION}/build" && \ 30 | cmake .. && \ 31 | make -j8 && \ 32 | cmake --install . --prefix /usr 33 | 34 | RUN mkdir -p "lib/Linux-$(uname -m)" 35 | # Use pre-compiled Piper phonemization library (includes onnxruntime) 36 | ARG PIPER_PHONEMIZE_VERSION='1.0.0' 37 | RUN mkdir -p "lib/Linux-$(uname -m)/piper_phonemize" && \ 38 | curl -L "https://github.com/rhasspy/piper-phonemize/releases/download/v${PIPER_PHONEMIZE_VERSION}/libpiper_phonemize-${TARGETARCH}${TARGETVARIANT}.tar.gz" | \ 39 | tar -C "lib/Linux-$(uname -m)/piper_phonemize" -xzvf - 40 | 41 | RUN wget https://go.dev/dl/go1.20.5.linux-amd64.tar.gz && rm -rf /usr/local/go && tar -C /usr/local -xzf go1.20.5.linux-amd64.tar.gz 42 | ENV PATH=$PATH:/usr/local/go/bin 43 | 44 | RUN cp -rfv /build/lib/Linux-$(uname -m)/piper_phonemize/lib/. /lib64/ 45 | RUN cp -rfv /build/lib/Linux-$(uname -m)/piper_phonemize/lib/. /usr/lib/ 46 | RUN cp -rfv /build/lib/Linux-$(uname -m)/piper_phonemize/include/. /usr/include/ 47 | 48 | 49 | # Build piper binary 50 | #COPY piper/Makefile ./ 51 | #COPY piper/src/cpp/ ./src/cpp/ 52 | #RUN make 53 | 54 | # Do a test run 55 | #RUN ./build/piper --help 56 | 57 | # Build .tar.gz to keep symlinks 58 | #WORKDIR /dist 59 | #RUN mkdir -p piper && \ 60 | # cp -dR /build/build/*.so* /build/build/espeak-ng-data /build/build/libtashkeel_model.ort /build/build/piper ./piper/ && \ 61 | # tar -czf "piper_${TARGETARCH}${TARGETVARIANT}.tar.gz" piper/ 62 | 63 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Ettore Di Giacinto 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | INCLUDE_PATH := $(abspath ./) 2 | LIBRARY_PATH := $(abspath ./) 3 | 4 | 5 | BUILD_TYPE?= 6 | # keep standard at C11 and C++11 7 | CFLAGS = -I. -I./piper/src/cpp -I./piper/build/fi/include -I./piper/build/pi/include -I./piper/build/si/include -O3 -DNDEBUG -std=c11 -fPIC -I./piper-phonemize/pi/include 8 | CXXFLAGS = -I. -I./piper/src/cpp -I./piper/build/fi/include -I./piper/build/pi/include -I./piper/build/si/include -O3 -DNDEBUG -std=c++17 -fPIC -I./piper-phonemize/pi/include 9 | LDFLAGS = -L./piper-phonemize/pi/lib -L./espeak/ei/lib/ -L./piper/build/fi/lib -L./piper/build/pi/lib -L./piper/build/si/lib -lfmt -lspdlog -lucd 10 | 11 | # warnings 12 | CFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wdouble-promotion -Wshadow -Wstrict-prototypes -Wpointer-arith -Wno-unused-function 13 | CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function 14 | # 15 | # Print build information 16 | # 17 | 18 | $(info I go-piper build info: ) 19 | piper.o: 20 | mkdir -p piper/build 21 | mkdir -p piper-phonemize/pi 22 | mkdir -p espeak/ei 23 | cd espeak/ei && cmake .. -DUSE_ASYNC:BOOL=OFF -DBUILD_SHARED_LIBS:BOOL=ON -DUSE_MBROLA:BOOL=OFF -DUSE_LIBPCAUDIO:BOOL=OFF -DUSE_KLATT:BOOL=OFF -DUSE_SPEECHPLAYER:BOOL=OFF -DBUILD_ESPEAK_NG_TESTS:BOOL=OFF -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON -DEXTRA_cmn:BOOL=ON -DEXTRA_ru:BOOL=ON -DCMAKE_C_FLAGS="-D_FILE_OFFSET_BITS=64" -DUSE_LIBSONIC:BOOL=OFF -DBUILD_SHARED_LIBS:BOOL=ON -DCMAKE_INSTALL_PREFIX:PATH=$(abspath ./)/espeak/ei -DCMAKE_BUILD_TYPE=Release && make install 24 | cd piper-phonemize/pi && cmake .. --debug-output -DCMAKE_INSTALL_PREFIX:PATH=$(abspath ./)/piper-phonemize/pi -DESPEAK_NG_DIR=$(abspath ./)/espeak/ei/ -DCMAKE_BUILD_TYPE=Release && make install 25 | if [ -d "$(abspath ./)/piper-phonemize/pi/lib64" ]; then cp -rfv piper-phonemize/pi/lib64/* piper-phonemize/pi/lib; fi 26 | cd piper/build && cmake .. -DPIPER_PHONEMIZE_DIR=$(abspath ./)/piper-phonemize/pi -DCMAKE_BUILD_TYPE=Release $(CMAKE_ARGS) && make 27 | cp piper/build/CMakeFiles/piper.dir/src/cpp/piper.cpp.o piper.o 28 | 29 | gopiper.o: piper.o 30 | $(CXX) $(CXXFLAGS) gopiper.cpp -o gopiper.o -c $(LDFLAGS) 31 | 32 | libpiper_binding.a: piper.o gopiper.o 33 | ar src libpiper_binding.a piper.o 34 | 35 | example/main: libpiper_binding.a 36 | CGO_CXXFLAGS="${CXXFLAGS}" CGO_LDFLAGS="${LDFLAGS}" LIBRARY_PATH=${LIBRARY_PATH} go build -buildvcs=false -x -o example/main ./example 37 | 38 | clean: 39 | rm -rf *.o 40 | rm -rf *.a 41 | rm -rf piper/build 42 | rm -rf piper-phonemize/pi 43 | rm -rf espeak/ei 44 | rm -rf example/main 45 | 46 | docker-run: 47 | docker build -t piper . && docker run -v $(abspath ./):/build/go -ti --rm piper 48 | -------------------------------------------------------------------------------- /example/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/mudler/go-piper" 7 | ) 8 | 9 | func main() { 10 | 11 | fmt.Println(piper.TextToWav("Ciao a tutti, mi chiamo riccardo", "it-riccardo_fasol-x-low.onnx", "/build/lib/Linux-x86_64/piper_phonemize/lib/espeak-ng-data/", "", "ciao.wav")) 12 | } 13 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/mudler/go-piper 2 | 3 | go 1.20 4 | -------------------------------------------------------------------------------- /gopiper.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #ifdef _MSC_VER 16 | #define WIN32_LEAN_AND_MEAN 17 | #define NOMINMAX 18 | #include 19 | #endif 20 | 21 | #ifdef __APPLE__ 22 | #include 23 | #endif 24 | 25 | #include "piper.hpp" 26 | 27 | using namespace std; 28 | 29 | int _piper_tts(char *text, char *model, char *espeakData, char *tashkeelPath, char *dst, optional speakerId) { 30 | filesystem::path model_path; 31 | filesystem::path config_path; 32 | model_path = filesystem::path(std::string(model)); 33 | config_path = filesystem::path(std::string(model) + ".json"); 34 | 35 | piper::PiperConfig piperConfig; 36 | piper::Voice voice; 37 | 38 | loadVoice(piperConfig, model_path.string(), 39 | config_path.string(), voice, speakerId); 40 | 41 | if (voice.phonemizeConfig.phonemeType == piper::eSpeakPhonemes) { 42 | spdlog::debug("Voice uses eSpeak phonemes ({})", 43 | voice.phonemizeConfig.eSpeak.voice); 44 | piperConfig.eSpeakDataPath = espeakData; 45 | } else { 46 | // Not using eSpeak 47 | piperConfig.useESpeak = false; 48 | } 49 | 50 | // Enable libtashkeel for Arabic 51 | if (voice.phonemizeConfig.eSpeak.voice == "ar") { 52 | piperConfig.useTashkeel = true; 53 | piperConfig.tashkeelModelPath =tashkeelPath; 54 | } 55 | 56 | piper::initialize(piperConfig); 57 | 58 | // Scales 59 | // if (runConfig.noiseScale) { 60 | // voice.synthesisConfig.noiseScale = runConfig.noiseScale.value(); 61 | // } 62 | 63 | // if (runConfig.lengthScale) { 64 | // voice.synthesisConfig.lengthScale = runConfig.lengthScale.value(); 65 | // } 66 | 67 | // if (runConfig.noiseW) { 68 | // voice.synthesisConfig.noiseW = runConfig.noiseW.value(); 69 | // } 70 | 71 | // if (runConfig.sentenceSilenceSeconds) { 72 | // voice.synthesisConfig.sentenceSilenceSeconds = 73 | // runConfig.sentenceSilenceSeconds.value(); 74 | // } 75 | 76 | piper::SynthesisResult result; 77 | ofstream audioFile(dst, ios::binary); 78 | piper::textToWavFile(piperConfig, voice, text, audioFile, result); 79 | piper::terminate(piperConfig); 80 | 81 | return EXIT_SUCCESS; 82 | } 83 | 84 | int piper_tts(char *text, char *model, char *espeakData, char *tashkeelPath, char *dst) { 85 | optional speakerId; 86 | return _piper_tts(text, model, espeakData, tashkeelPath, dst, speakerId); 87 | } 88 | 89 | int piper_tts_speaker(char *text, char *model, char *espeakData, char *tashkeelPath, char *dst, int64_t speakerId) { 90 | return _piper_tts(text, model, espeakData, tashkeelPath, dst, speakerId); 91 | } 92 | -------------------------------------------------------------------------------- /gopiper.h: -------------------------------------------------------------------------------- 1 | #ifdef __cplusplus 2 | extern "C" { 3 | #endif 4 | int piper_tts(char *text, char *model, char *espeakData, char *tashkeelPath, char *dst); 5 | int piper_tts_speaker(char *text, char *model, char *espeakData, char *tashkeelPath, char *dst, int64_t speakerId); 6 | #ifdef __cplusplus 7 | } 8 | #endif 9 | -------------------------------------------------------------------------------- /piper.go: -------------------------------------------------------------------------------- 1 | package piper 2 | 3 | // #cgo CXXFLAGS: -I${SRCDIR}/piper/src/cpp/ -I${SRCDIR}/piper-phonemize/pi/include -std=c++17 4 | // #cgo LDFLAGS: -L${SRCDIR}/espeak/ei/lib/ -L${SRCDIR}/piper-phonemize/pi/lib/ -lpiper_binding -lspdlog -lonnxruntime -lespeak-ng -lpiper_phonemize -lucd 5 | // #include 6 | // #include 7 | import "C" 8 | import ( 9 | "fmt" 10 | ) 11 | 12 | func TextToWav(text, model, espeek, tas, dst string) error { 13 | t := C.CString(text) 14 | m := C.CString(model) 15 | ee := C.CString(espeek) 16 | tt := C.CString(tas) 17 | d := C.CString(dst) 18 | 19 | ret := C.piper_tts(t, m, ee, tt, d) 20 | if ret != 0 { 21 | return fmt.Errorf("failed") 22 | } 23 | return nil 24 | } 25 | 26 | func TextToWavSpeaker(text, model, espeek, tas, dst string, speakerId int64) error { 27 | t := C.CString(text) 28 | m := C.CString(model) 29 | ee := C.CString(espeek) 30 | tt := C.CString(tas) 31 | d := C.CString(dst) 32 | sid := C.int64_t(speakerId) 33 | 34 | ret := C.piper_tts_speaker(t, m, ee, tt, d, sid) 35 | if ret != 0 { 36 | return fmt.Errorf("failed") 37 | } 38 | return nil 39 | } 40 | --------------------------------------------------------------------------------