├── .github ├── FUNDING.yml └── workflows │ └── gitlab-mirror.yml ├── .npmignore ├── .gitignore ├── deploy ├── .env ├── config │ └── Caddyfile ├── compose.tts-api.yaml └── compose.caddy.yaml ├── views ├── index.pug ├── settings.pug └── form.pug ├── package.json ├── LICENSE ├── .gitlab-ci.yml ├── js └── form.js ├── app ├── index.js └── cmd.js └── README.md /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | github: pedroetb 2 | -------------------------------------------------------------------------------- /.npmignore: -------------------------------------------------------------------------------- 1 | .* 2 | deploy 3 | Dockerfile 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | npm-debug.log 3 | *.tgz 4 | -------------------------------------------------------------------------------- /deploy/.env: -------------------------------------------------------------------------------- 1 | PORT=3000 2 | CONTAINER_NAME=tts-api 3 | CADDY_PORT=2015 4 | TRAEFIK_DOMAIN=change.me 5 | 6 | TTS_API_NET_NAME=tts-api-net 7 | TTS_API_NET_DRIVER=overlay 8 | -------------------------------------------------------------------------------- /deploy/config/Caddyfile: -------------------------------------------------------------------------------- 1 | { 2 | admin off 3 | auto_https off 4 | log { 5 | format console { 6 | time_format iso8601 7 | } 8 | } 9 | } 10 | 11 | :{$CADDY_PORT} { 12 | reverse_proxy {$CONTAINER_NAME}:{$PORT} 13 | respond /health "OK" 200 14 | } 15 | -------------------------------------------------------------------------------- /deploy/compose.tts-api.yaml: -------------------------------------------------------------------------------- 1 | services: 2 | tts-api: 3 | image: ${IMAGE_NAME:-pedroetb/tts-api}:${IMAGE_TAG:-latest} 4 | container_name: ${CONTAINER_NAME} 5 | environment: 6 | PORT: 7 | networks: 8 | tts-api-net: 9 | devices: 10 | - ${AUDIO_DEVICE:-/dev/snd} 11 | restart: always 12 | cpus: '${CPUS:-0.5}' 13 | mem_limit: ${MEM_LIMIT:-128M} 14 | mem_reservation: ${MEM_RESERVATION:-64M} 15 | 16 | networks: 17 | tts-api-net: 18 | name: ${TTS_API_NET_NAME} 19 | driver: ${TTS_API_NET_DRIVER} 20 | external: true 21 | -------------------------------------------------------------------------------- /views/index.pug: -------------------------------------------------------------------------------- 1 | doctype html 2 | 3 | include settings 4 | 5 | html 6 | head 7 | title TTS-API 8 | link(rel='stylesheet' href='/css/bootstrap/bootstrap.min.css') 9 | link(rel='stylesheet' href='/css/alertify/alertify.min.css') 10 | link(rel='stylesheet' href='/css/alertify/default.min.css') 11 | script(src='/js/alertify/alertify.min.js') 12 | script(src='/js/form.js') 13 | 14 | body(onload=`onVoiceChange(${JSON.stringify(voices[0])})`) 15 | div( 16 | style='padding-top: 15px; padding-bottom: 15px' 17 | ).container 18 | div.row 19 | div.col-12 20 | h1 TTS-API 21 | 22 | div.row 23 | div.col-12 24 | include form 25 | -------------------------------------------------------------------------------- /.github/workflows/gitlab-mirror.yml: -------------------------------------------------------------------------------- 1 | name: Mirror and run GitLab CI 2 | 3 | on: [push] 4 | 5 | jobs: 6 | build: 7 | runs-on: ubuntu-latest 8 | steps: 9 | - uses: actions/checkout@v1 10 | - name: Mirror + trigger CI 11 | uses: SvanBoxel/gitlab-mirror-and-ci-action@master 12 | with: 13 | args: "https://gitlab.com/pedroetb-projects/tts-api.git/" 14 | env: 15 | FOLLOW_TAGS: "true" 16 | FORCE_PUSH: "false" 17 | GITLAB_HOSTNAME: "gitlab.com" 18 | GITLAB_USERNAME: "pedroetb" 19 | GITLAB_PASSWORD: ${{ secrets.GITLAB_PASSWORD }} 20 | GITLAB_PROJECT_ID: "16939539" 21 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 22 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "tts-api", 3 | "version": "2.6.1", 4 | "description": "Text to speech REST API for multiple TTS engines", 5 | "keywords": [ 6 | "text-to-speech", 7 | "tts", 8 | "speech", 9 | "voice", 10 | "rest-api", 11 | "api" 12 | ], 13 | "homepage": "https://github.com/pedroetb/tts-api", 14 | "license": "MIT", 15 | "author": "Pedro Trujillo", 16 | "contributors": [ 17 | { 18 | "name": "Pedro Trujillo", 19 | "email": "pedroetb@gmail.com" 20 | } 21 | ], 22 | "main": "app", 23 | "repository": { 24 | "type": "git", 25 | "url": "https://github.com/pedroetb/tts-api.git" 26 | }, 27 | "scripts": { 28 | "start": "node app" 29 | }, 30 | "dependencies": { 31 | "alertifyjs": "^1.13.1", 32 | "body-parser": "^1.20.2", 33 | "bootstrap": "^4.6.2", 34 | "express": "^4.18.3", 35 | "pug": "^3.0.2" 36 | }, 37 | "files": [ 38 | "app", 39 | "views", 40 | "js" 41 | ], 42 | "engines": { 43 | "node": ">=18.19.1", 44 | "npm": ">=10.2.4" 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Pedro Trujillo (pedroetb) 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /.gitlab-ci.yml: -------------------------------------------------------------------------------- 1 | include: 2 | - project: 'pedroetb-projects/gitlab-ci-templates' 3 | ref: master 4 | file: '/packaging-docker/docker-build.yml' 5 | - project: 'pedroetb-projects/gitlab-ci-templates' 6 | ref: master 7 | file: '/deployment-service/custom-image.yml' 8 | - project: 'pedroetb-projects/gitlab-ci-templates' 9 | ref: master 10 | file: '/deployment-service/lulu/docker-deploy.yml' 11 | 12 | stages: 13 | - pre-package 14 | - package 15 | - post-package 16 | - deploy 17 | 18 | .deploy: 19 | variables: 20 | FORCE_DOCKER_COMPOSE: 1 21 | COMPOSE_FILE: compose.tts-api.yaml 22 | SERVICES_TO_CHECK: tts-api 23 | 24 | .deploy-lulu: 25 | environment: 26 | name: lulu/tts-api 27 | 28 | .deploy-caddy: 29 | variables: &deploy-caddy-variables 30 | FORCE_DOCKER_COMPOSE: 0 31 | COMPOSE_FILE: compose.caddy.yaml 32 | SERVICES_TO_CHECK: ${CI_PROJECT_NAME}_caddy 33 | 34 | .deploy-lulu-caddy: 35 | environment: &deploy-lulu-caddy-environment 36 | name: lulu/caddy 37 | 38 | deploy-branch-lulu-caddy: 39 | extends: deploy-branch-lulu 40 | variables: 41 | <<: *deploy-caddy-variables 42 | environment: *deploy-lulu-caddy-environment 43 | 44 | deploy-tag-lulu-caddy: 45 | extends: deploy-tag-lulu 46 | variables: 47 | <<: *deploy-caddy-variables 48 | environment: *deploy-lulu-caddy-environment 49 | -------------------------------------------------------------------------------- /deploy/compose.caddy.yaml: -------------------------------------------------------------------------------- 1 | services: 2 | caddy: 3 | image: ${CADDY_IMAGE_NAME:-caddy}:${CADDY_IMAGE_TAG:-latest} 4 | environment: 5 | CADDY_PORT: 6 | CONTAINER_NAME: 7 | PORT: 8 | networks: 9 | tts-api-net: 10 | traefik-net: 11 | configs: 12 | - source: caddyfile-config 13 | target: /etc/caddy/Caddyfile 14 | healthcheck: 15 | test: wget --spider -q http://localhost:${CADDY_PORT}/health 16 | interval: ${HEALTHCHECK_INTERVAL:-30s} 17 | timeout: ${HEALTHCHECK_TIMEOUT:-15s} 18 | retries: ${HEALTHCHECK_RETRIES:-10} 19 | start_period: ${HEALTHCHECK_START_PERIOD:-30s} 20 | deploy: 21 | mode: replicated 22 | replicas: ${REPLICAS:-1} 23 | restart_policy: 24 | delay: ${RESTART_DELAY:-3s} 25 | update_config: 26 | delay: ${UPDATE_DELAY:-30s} 27 | labels: 28 | traefik.enable: ${TRAEFIK_ENABLE:-true} 29 | traefik.http.routers.tts-api.entrypoints: ${TRAEFIK_ENTRYPOINT:-https} 30 | traefik.http.routers.tts-api.rule: Host(`${TRAEFIK_SUBDOMAIN:-tts}.${TRAEFIK_DOMAIN}`) 31 | traefik.http.services.tts-api.loadbalancer.server.port: ${CADDY_PORT} 32 | resources: 33 | limits: 34 | cpus: '${RESOURCES_LIMITS_CPUS:-0.1}' 35 | memory: ${RESOURCES_LIMITS_MEMORY:-64M} 36 | reservations: 37 | cpus: '${RESOURCES_RESERVATIONS_CPUS:-0.001}' 38 | memory: ${RESOURCES_RESERVATIONS_MEMORY:-32M} 39 | 40 | networks: 41 | tts-api-net: 42 | name: ${TTS_API_NET_NAME} 43 | driver: ${TTS_API_NET_DRIVER} 44 | attachable: true 45 | 46 | traefik-net: 47 | name: ${TRAEFIK_NET_NAME:-traefik-net} 48 | driver: ${TRAEFIK_NET_DRIVER:-overlay} 49 | external: true 50 | 51 | configs: 52 | caddyfile-config: 53 | name: ${CADDYFILE_CONFIG_NAME:-tts-api-caddyfile} 54 | file: ./config/Caddyfile 55 | -------------------------------------------------------------------------------- /js/form.js: -------------------------------------------------------------------------------- 1 | var disabledInputIds; 2 | 3 | function onVoiceChange(voiceItem) { 4 | 5 | if (disabledInputIds) { 6 | updateInputsDisabledState(disabledInputIds, false); 7 | disabledInputIds = null; 8 | } 9 | 10 | var incompatibleSettings = voiceItem.incompatibleSettings; 11 | if (incompatibleSettings && incompatibleSettings instanceof Array) { 12 | updateInputsDisabledState(incompatibleSettings, true); 13 | disabledInputIds = incompatibleSettings; 14 | } 15 | } 16 | 17 | function updateInputsDisabledState(inputIds, disable) { 18 | 19 | for (var i = 0; i < inputIds.length; i++) { 20 | var input = document.getElementById(inputIds[i]); 21 | if (disable) { 22 | input.setAttribute('disabled', ''); 23 | } else { 24 | input.removeAttribute('disabled'); 25 | } 26 | } 27 | } 28 | 29 | function onSubmit(evt) { 30 | 31 | var form = document.forms[0]; 32 | 33 | if (!form || !form.checkValidity()) { 34 | return; 35 | } 36 | 37 | evt.preventDefault(); 38 | 39 | var value = getInputValues(form); 40 | submitData(value); 41 | } 42 | 43 | function getInputValues(form) { 44 | 45 | var inputs = form.elements, 46 | inputValues = {}; 47 | 48 | for (var i = 0; i < inputs.length; i++) { 49 | var input = inputs[i], 50 | inputName = input.name, 51 | inputType = input.type, 52 | inputValueAlreadySet = !!inputValues[inputName]; 53 | 54 | if (inputValueAlreadySet) { 55 | continue; 56 | } 57 | 58 | var namedInput = inputs[inputName], 59 | inputIsDisabled = !(!disabledInputIds || disabledInputIds.indexOf(inputName) === -1); 60 | 61 | if (!namedInput || inputIsDisabled) { 62 | continue; 63 | } 64 | 65 | inputValues[inputName] = inputType === 'checkbox' ? namedInput.checked : namedInput.value; 66 | } 67 | 68 | return inputValues; 69 | } 70 | 71 | function submitData(data) { 72 | 73 | alertify.set('notifier', 'position', 'top-right'); 74 | var startMessageHandler = alertify.message('Speaking...', 0); 75 | 76 | fetch('/', { 77 | method: 'post', 78 | headers: new Headers({ 79 | 'Content-Type': 'application/json' 80 | }), 81 | body: JSON.stringify(data) 82 | }).then((function(startMessageHandler, res) { 83 | 84 | startMessageHandler.dismiss(); 85 | if (res.ok) { 86 | alertify.success('Speech done!'); 87 | } else { 88 | alertify.error('Speech error!'); 89 | } 90 | }).bind(null, startMessageHandler)) 91 | .catch((function(startMessageHandler, err) { 92 | 93 | startMessageHandler.dismiss(); 94 | alertify.error('Connection error!'); 95 | }).bind(null, startMessageHandler)); 96 | } 97 | -------------------------------------------------------------------------------- /views/settings.pug: -------------------------------------------------------------------------------- 1 | - 2 | voice = { 3 | name: 'voice', 4 | label: 'Voice' 5 | } 6 | 7 | text = { 8 | id: 'textToSpeech', 9 | label: 'Text', 10 | minLength: 1, 11 | maxLength: 1000 12 | } 13 | 14 | language = { 15 | id: 'language', 16 | label: 'Language', 17 | value: 'es', 18 | minLength: 2, 19 | maxLength: 50 20 | } 21 | 22 | voiceCode = { 23 | id: 'voiceCode', 24 | label: 'Voice code', 25 | value: 'f4', 26 | maxLength: 50 27 | } 28 | 29 | speed = { 30 | id: 'speed', 31 | label: 'Speed', 32 | value: 1, 33 | min: 0.4, 34 | max: 2, 35 | step: 0.01 36 | } 37 | 38 | pitch = { 39 | id: 'pitch', 40 | label: 'Pitch', 41 | value: 1, 42 | min: -2000, 43 | max: 2000, 44 | step: 1 45 | } 46 | 47 | tempo = { 48 | id: 'tempo', 49 | label: 'Tempo', 50 | value: 1, 51 | min: 0.1, 52 | max: 5, 53 | step: 0.01 54 | } 55 | 56 | gain = { 57 | id: 'gain', 58 | label: 'Gain', 59 | value: 1, 60 | min: -50, 61 | max: 50, 62 | step: 0.1 63 | } 64 | 65 | delay = { 66 | id: 'delay', 67 | label: 'Delay', 68 | value: 0, 69 | min: 0, 70 | max: 10, 71 | step: 0.1 72 | } 73 | 74 | modifiers = { 75 | label: 'Modifiers' 76 | } 77 | 78 | slowReading = { 79 | id: 'slowReading', 80 | label: 'Slow reading' 81 | } 82 | 83 | reverse = { 84 | id: 'reverse', 85 | label: 'Reverse' 86 | } 87 | 88 | reverb = { 89 | id: 'reverb', 90 | label: 'Reverb' 91 | } 92 | 93 | robot = { 94 | id: 'robot', 95 | label: 'Robot' 96 | } 97 | 98 | voices = [{ 99 | value: 'google_speech', 100 | label: 'GoogleSpeech', 101 | incompatibleSettings: [ 102 | voiceCode.id, 103 | slowReading.id 104 | ] 105 | },{ 106 | value: 'google_speech_file', 107 | label: 'GoogleSpeech (file)', 108 | incompatibleSettings: [ 109 | voiceCode.id, 110 | speed.id, 111 | pitch.id, 112 | tempo.id, 113 | gain.id, 114 | delay.id, 115 | slowReading.id, 116 | reverse.id, 117 | reverb.id, 118 | robot.id 119 | ] 120 | },{ 121 | value: 'gtts', 122 | label: 'gTTS', 123 | incompatibleSettings: [ 124 | voiceCode.id 125 | ] 126 | },{ 127 | value: 'gtts_file', 128 | label: 'gTTS (file)', 129 | incompatibleSettings: [ 130 | voiceCode.id 131 | ] 132 | },{ 133 | value: 'festival', 134 | label: 'Festival', 135 | incompatibleSettings: [ 136 | voiceCode.id, 137 | speed.id, 138 | pitch.id, 139 | tempo.id, 140 | gain.id, 141 | delay.id, 142 | slowReading.id, 143 | reverse.id, 144 | reverb.id, 145 | robot.id 146 | ] 147 | },{ 148 | value: 'espeak', 149 | label: 'eSpeak', 150 | incompatibleSettings: [ 151 | slowReading.id 152 | ] 153 | },{ 154 | value: 'espeak_file', 155 | label: 'eSpeak (file)', 156 | incompatibleSettings: [ 157 | slowReading.id 158 | ] 159 | }] 160 | -------------------------------------------------------------------------------- /views/form.pug: -------------------------------------------------------------------------------- 1 | form 2 | div.form-group 3 | label #{voice.label} 4 | div.form-check 5 | each voiceItem, index in voices 6 | div.form-check-inline 7 | label.form-check-label 8 | input.form-check-input( 9 | type='radio' 10 | name=voice.name 11 | value=voiceItem.value 12 | checked=(!index ? true : false) 13 | onchange=`onVoiceChange(${JSON.stringify(voiceItem)})` 14 | ) 15 | span #{voiceItem.label} 16 | 17 | div.form-group 18 | label(for=text.id) #{text.label} 19 | textarea( 20 | id=text.id 21 | type='text' 22 | name=text.id 23 | placeholder=text.label 24 | autofocus 25 | minlength=text.minLength 26 | maxlength=text.maxLength 27 | required 28 | ).form-control 29 | 30 | div.form-group 31 | label(for=language.id) #{language.label} 32 | input( 33 | id=language.id 34 | type='text' 35 | name=language.id 36 | placeholder=language.label 37 | value=language.value 38 | minlength=language.minLength 39 | maxlength=language.maxLength 40 | required 41 | ).form-control 42 | 43 | div.form-group 44 | label(for=voiceCode.id) #{voiceCode.label} 45 | input( 46 | id=voiceCode.id 47 | type='text' 48 | name=voiceCode.id 49 | placeholder=voiceCode.label 50 | value=voiceCode.value 51 | maxlength=voiceCode.maxLength 52 | ).form-control 53 | 54 | div.form-group 55 | label(for=speed.id) #{speed.label} 56 | input( 57 | id=speed.id 58 | type='number' 59 | name=speed.id 60 | placeholder=speed.label 61 | value=speed.value 62 | min=speed.min 63 | max=speed.max 64 | step=speed.step 65 | ).form-control 66 | 67 | div.form-group 68 | label(for=pitch.id) #{pitch.label} 69 | input( 70 | id=pitch.id 71 | type='number' 72 | name=pitch.id 73 | placeholder=pitch.label 74 | value=pitch.value 75 | min=pitch.min 76 | max=pitch.max 77 | step=pitch.step 78 | ).form-control 79 | 80 | div.form-group 81 | label(for=tempo.id) #{tempo.label} 82 | input( 83 | id=tempo.id 84 | type='number' 85 | name=tempo.id 86 | placeholder=tempo.label 87 | value=tempo.value 88 | min=tempo.min 89 | max=tempo.max 90 | step=tempo.step 91 | ).form-control 92 | 93 | div.form-group 94 | label(for=gain.id) #{gain.label} 95 | input( 96 | id=gain.id 97 | type='number' 98 | name=gain.id 99 | placeholder=gain.label 100 | value=gain.value 101 | min=gain.min 102 | max=gain.max 103 | step=gain.step 104 | ).form-control 105 | 106 | div.form-group 107 | label(for=delay.id) #{delay.label} 108 | input( 109 | id=delay.id 110 | type='number' 111 | name=delay.id 112 | placeholder=delay.label 113 | value=delay.value 114 | min=delay.min 115 | max=delay.max 116 | step=delay.step 117 | ).form-control 118 | 119 | div.form-group 120 | label #{modifiers.label} 121 | div.form-check 122 | div.form-check-inline 123 | label.form-check-label 124 | input.form-check-input( 125 | id=slowReading.id 126 | type='checkbox' 127 | name=slowReading.id 128 | ) 129 | span #{slowReading.label} 130 | 131 | div.form-check-inline 132 | label.form-check-label 133 | input.form-check-input( 134 | id=reverse.id 135 | type='checkbox' 136 | name=reverse.id 137 | ) 138 | span #{reverse.label} 139 | 140 | div.form-check-inline 141 | label.form-check-label 142 | input.form-check-input( 143 | id=reverb.id 144 | type='checkbox' 145 | name=reverb.id 146 | ) 147 | span #{reverb.label} 148 | 149 | div.form-check-inline 150 | label.form-check-label 151 | input.form-check-input( 152 | id=robot.id 153 | type='checkbox' 154 | name=robot.id 155 | ) 156 | span #{robot.label} 157 | 158 | button( 159 | onclick='onSubmit(event)' 160 | ).btn.btn-primary 161 | span Submit 162 | -------------------------------------------------------------------------------- /app/index.js: -------------------------------------------------------------------------------- 1 | var util = require('util'), 2 | childProcess = require('child_process'), 3 | express = require('express'), 4 | bodyParser = require('body-parser'), 5 | 6 | cmd = require('./cmd'), 7 | 8 | server = express(), 9 | port = process.env.PORT || 3000; 10 | 11 | server.set('view engine', 'pug') 12 | .use(bodyParser.json()) 13 | 14 | .use('/css/bootstrap', express.static(__dirname + '/../node_modules/bootstrap/dist/css')) 15 | .use('/css/alertify', express.static(__dirname + '/../node_modules/alertifyjs/build/css')) 16 | .use('/css/alertify', express.static(__dirname + '/../node_modules/alertifyjs/build/css/themes')) 17 | .use('/js', express.static(__dirname + '/../js')) 18 | .use('/js/alertify', express.static(__dirname + '/../node_modules/alertifyjs/build')) 19 | 20 | .get('/', renderForm) 21 | .post('/', processData) 22 | 23 | .listen(port, function() { 24 | 25 | console.log('Listening at port', this.address().port); 26 | }); 27 | 28 | function renderForm(req, res) { 29 | 30 | res.render('index'); 31 | } 32 | 33 | function processData(req, res) { 34 | 35 | var body = req.body, 36 | cmdWithArgs = cmd.getCmdWithArgs(body) || {}, 37 | httpArgs = { 38 | res: res, 39 | fields: body 40 | }; 41 | 42 | if (cmdWithArgs instanceof Array) { 43 | runSpeechProcessChain(cmdWithArgs, httpArgs); 44 | } else { 45 | runLastSpeechProcess(cmdWithArgs, httpArgs); 46 | } 47 | } 48 | 49 | function runLastSpeechProcess(cmdWithArgs, httpArgs) { 50 | 51 | var speechProcess = runSpeechProcess(cmdWithArgs); 52 | 53 | speechProcess.on('error', onLastSpeechError.bind(this, httpArgs)); 54 | speechProcess.on('close', onLastSpeechClose); 55 | speechProcess.on('exit', onLastSpeechExit.bind(this, cmdWithArgs, httpArgs)); 56 | 57 | return speechProcess; 58 | } 59 | 60 | function runSpeechProcess(cmdWithArgs) { 61 | 62 | var newProcess = childProcess.spawn(cmdWithArgs.cmd, cmdWithArgs.args); 63 | 64 | newProcess.stderr.on('data', onSpeechStandardError); 65 | 66 | return newProcess; 67 | } 68 | 69 | function onSpeechStandardError(buffer) { 70 | 71 | console.error('[stderr]:', buffer.toString('utf8')); 72 | } 73 | 74 | function runSpeechProcessChain(cmdWithArgs, httpArgs) { 75 | 76 | var speechProcs = {}; 77 | 78 | for (var i = 0; i < cmdWithArgs.length; i++) { 79 | if (i !== cmdWithArgs.length - 1) { 80 | var getNextProcessCbk = getNextSpeechProcess.bind(speechProcs, i + 1); 81 | speechProcs[i] = runIntermediateSpeechProcess(cmdWithArgs[i], getNextProcessCbk); 82 | } else { 83 | speechProcs[i] = runLastSpeechProcess(cmdWithArgs[i], httpArgs); 84 | } 85 | } 86 | } 87 | 88 | function runIntermediateSpeechProcess(cmdWithArgs, procArgs) { 89 | 90 | var speechProcess = runSpeechProcess(cmdWithArgs); 91 | 92 | speechProcess.stdout.on('data', onIntermediateSpeechStandardOutput.bind(this, procArgs)); 93 | speechProcess.on('error', onIntermediateSpeechError); 94 | speechProcess.on('close', onIntermediateSpeechClose.bind(this, procArgs)); 95 | 96 | return speechProcess; 97 | } 98 | 99 | function getNextSpeechProcess(nextIndex) { 100 | 101 | return this[nextIndex]; 102 | } 103 | 104 | function onIntermediateSpeechStandardOutput(getNextProc, data) { 105 | 106 | var nextSpeechProcess = getNextProc(), 107 | inputStream = nextSpeechProcess.stdin; 108 | 109 | if (inputStream.writable) { 110 | inputStream.write(data); 111 | } 112 | } 113 | 114 | function onIntermediateSpeechClose(getNextProc, code) { 115 | 116 | var nextSpeechProcess = getNextProc(), 117 | inputStream = nextSpeechProcess.stdin; 118 | 119 | if (code) { 120 | console.error('[intermediate exit code]:', code); 121 | } 122 | 123 | inputStream.end(); 124 | } 125 | 126 | function onIntermediateSpeechError(err) { 127 | 128 | console.error('[intermediate error]:', util.inspect(err)); 129 | } 130 | 131 | function onLastSpeechClose(code) { 132 | 133 | if (code) { 134 | console.error('[exit code]:', code); 135 | } 136 | } 137 | 138 | function onLastSpeechExit(cmdWithArgs, httpArgs, err) { 139 | 140 | var res = httpArgs.res, 141 | filePath = cmdWithArgs.file; 142 | 143 | if (!err) { 144 | if (filePath) { 145 | res.download(filePath); 146 | } else { 147 | res.end(); 148 | } 149 | } else { 150 | handleSpeechError(httpArgs, err); 151 | } 152 | } 153 | 154 | function onLastSpeechError(args, err) { 155 | 156 | handleSpeechError(args, err); 157 | } 158 | 159 | function handleSpeechError(args, err) { 160 | 161 | var res = args.res, 162 | fields = args.fields, 163 | errorHeaderMessage = '----[error]----', 164 | dataHeaderMessage = '-----[data]-----', 165 | inspectedError = util.inspect(err), 166 | inspectedFields = util.inspect(fields); 167 | 168 | res.writeHead(500, { 169 | 'Content-Type': 'text/plain; charset=utf-8' 170 | }); 171 | 172 | res.write(errorHeaderMessage + '\n'); 173 | res.write(inspectedError + '\n'); 174 | res.write(dataHeaderMessage + '\n'); 175 | res.write(inspectedFields + '\n'); 176 | 177 | res.end(); 178 | 179 | console.error(errorHeaderMessage); 180 | console.error(inspectedError); 181 | console.error(dataHeaderMessage); 182 | console.error(inspectedFields); 183 | } 184 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # TTS-API 2 | 3 | Text to speech REST API for multiple TTS engines. 4 | 5 | [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE) 6 | [![Sponsor](https://img.shields.io/badge/-Sponsor-fafbfc?logo=GitHub%20Sponsors)](https://github.com/sponsors/pedroetb) 7 | 8 | You can send a text to be converted into audio, using different TTS engines and sound effects. Then, you will be listening to it on your local audio device, or receiving the resultant audio file. 9 | 10 | ## Setup 11 | 12 | First, you should install the supported TTS engines: 13 | 14 | ### GoogleSpeech setup 15 | 16 | ```sh 17 | apt install python3 sox libsox-fmt-mp3 18 | pip install google_speech 19 | ``` 20 | 21 | ### gTTS setup 22 | 23 | ```sh 24 | apt install python3 sox libsox-fmt-mp3 25 | pip install gTTS 26 | ``` 27 | 28 | ### Festival setup 29 | 30 | ```sh 31 | apt install festival festvox-ellpc11k 32 | ``` 33 | 34 | ### eSpeak setup 35 | 36 | ```sh 37 | apt install espeak 38 | ``` 39 | 40 | You also need to install **nodejs** and **npm**, and then, simply run `npm install` and `npm start`. 41 | The API should now be running at `http://localhost:3000`. 42 | 43 | Or you can just use [pedroetb/tts-api](https://hub.docker.com/r/pedroetb/tts-api) **Docker** image, which already has all dependencies configured. 44 | 45 | ## Setup using Docker 46 | 47 | The only requirement is to have **Docker** installed. Then, you can run: 48 | 49 | ```sh 50 | docker run --rm -d --name tts-api --device /dev/snd -p 3000:3000 pedroetb/tts-api 51 | ``` 52 | 53 | The API will be running and accessible at `http://localhost:3000`. 54 | 55 | Alternatively, you can deploy it in a **Docker Swarm** cluster using `docker compose` (already included in Docker if using modern version) and `docker swarm` (create Swarm cluster first): 56 | 57 | ```sh 58 | cd deploy 59 | 60 | # Deploy Caddy service 61 | env $(grep -v '^[#| ]' .env | xargs) \ 62 | TRAEFIK_DOMAIN=change.me \ 63 | docker stack deploy \ 64 | -c compose.caddy.yaml \ 65 | tts-api 66 | 67 | # Run TTS-API container 68 | docker compose \ 69 | -f compose.tts-api.yaml \ 70 | -p tts-api \ 71 | up -d 72 | ``` 73 | 74 | The service is prepared to be reverse-proxied with **Traefik**, and accessible at `tts.${TRAEFIK_DOMAIN}` domain. How to run **Traefik** is not described here, check its [official site](https://traefik.io). 75 | 76 | The proxy needs a little help from **Caddy**, because Docker Swarm is not compatible with devices configuration (required to use sound capabilities) and Traefik cannot work with Docker containers and Docker Swarm services all at once. This way, only **Caddy** service is exposed using **Traefik** and `tts-api` container is only accessible through reverse-proxy provided by **Caddy** (same way **Traefik** is reverse-proxing to **Caddy**). 77 | 78 | Both, Docker container and service, can be running on different hosts, because they are able to communicate through a Docker overlay network. Run `tts-api` Docker container on host which has speakers, so you can listen speech. 79 | 80 | Don't forget to edit `TRAEFIK_DOMAIN` environment variable before deploying. 81 | 82 | ## Usage 83 | 84 | When running, the API will receive POST requests at `http://localhost:3000`. 85 | You can use your favourite REST client to send a request, or use the built-in form. 86 | 87 | Both modes (playing or downloading audio) are available using different voice codes, select one according to your needs. 88 | 89 | ### Built-in form 90 | 91 | Go to `http://localhost:3000` with your browser, fill the form with data and submit it. Just that. 92 | 93 | ### Send POST request 94 | 95 | You can send a POST request to `http://localhost:3000` following this scheme: 96 | 97 | * **Headers** 98 | * **Content-Type**: `application/json` 99 | * **Body** 100 | * `{ "voice": "google_speech", "textToSpeech": "hello world", "language": "en", "speed": "1" }` 101 | 102 | For example, using `curl`: 103 | 104 | ```sh 105 | # Play audio 106 | curl http://localhost:3000 \ 107 | -d '{ "voice": "google_speech", "textToSpeech": "hello world", "language": "en", "speed": "1" }' \ 108 | -H 'Content-Type: application/json' 109 | 110 | # Download audio file 111 | curl http://localhost:3000 \ 112 | -d '{ "voice": "gtts_file", "textToSpeech": "hello world", "language": "en", "speed": "1" }' \ 113 | -H 'Content-Type: application/json' \ 114 | -o 'output.mp3' 115 | ``` 116 | 117 | ## Available TTS engines 118 | 119 | ### GoogleSpeech engine 120 | 121 | Google Speech is a simple multiplatform command line tool to read text using Google Translate TTS (Text To Speech) API. 122 | 123 | You need to be online to communicate with Google servers. 124 | 125 | Learn more at 126 | 127 | ### gTTS engine 128 | 129 | Google Text-to-Speech (gTTS) is a Python library and CLI tool to interface with Google Translate's text-to-speech API. 130 | 131 | You need to be online to communicate with Google servers. 132 | 133 | Learn more at 134 | 135 | ### Festival engine 136 | 137 | Festival is a free software multi-lingual speech synthesis workbench that runs on multiple-platforms offering black box text to speech, as well as an open architecture for research in speech synthesis. 138 | 139 | It works offline. 140 | 141 | Learn more at and 142 | 143 | ### eSpeak engine 144 | 145 | eSpeak is a compact open source software speech synthesizer for English and other languages, for Linux and Windows. 146 | 147 | It works offline. 148 | 149 | Learn more at 150 | 151 | ## License 152 | 153 | [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE) 154 | 155 | This project is released under the [MIT License](LICENSE). 156 | -------------------------------------------------------------------------------- /app/cmd.js: -------------------------------------------------------------------------------- 1 | function getCmdWithArgs(fields) { 2 | 3 | var voice = fields.voice; 4 | 5 | if (voice === 'google_speech') { 6 | return getGoogleSpeechCmdWithArgs(fields); 7 | } else if (voice === 'google_speech_file') { 8 | return getGoogleSpeechFileCmdWithArgs(fields); 9 | } else if (voice === 'gtts') { 10 | return getGttsCmdWithArgs(fields); 11 | } else if (voice === 'gtts_file') { 12 | return getGttsFileCmdWithArgs(fields); 13 | } else if (voice === 'festival') { 14 | return getFestivalCmdWithArgs(fields); 15 | } else if (voice === 'espeak') { 16 | return getEspeakCmdWithArgs(fields); 17 | } else if (voice === 'espeak_file') { 18 | return getEspeakFileCmdWithArgs(fields); 19 | } 20 | } 21 | 22 | function getGoogleSpeechCmdWithArgs(fields) { 23 | 24 | var text = fields.textToSpeech, 25 | language = fields.language, 26 | soxArgs = getSoxEffectsArgs(fields); 27 | 28 | var args = [ 29 | '-l', language, 30 | text, 31 | '-v', 'warning' 32 | ]; 33 | 34 | if (soxArgs.length) { 35 | args.push('-e'); 36 | args = args.concat(soxArgs); 37 | } 38 | 39 | return { 40 | cmd: 'google_speech', 41 | args: args 42 | }; 43 | } 44 | 45 | function getGoogleSpeechFileCmdWithArgs(fields) { 46 | 47 | var text = fields.textToSpeech, 48 | language = fields.language, 49 | outputPath = getAudioFilePath('mp3'); 50 | 51 | var args = [ 52 | '-l', language, 53 | text, 54 | '-v', 'warning', 55 | '-o', outputPath 56 | ]; 57 | 58 | return { 59 | cmd: 'google_speech', 60 | args: args, 61 | file: outputPath 62 | }; 63 | } 64 | 65 | function getGttsCmdWithArgs(fields) { 66 | 67 | var text = fields.textToSpeech, 68 | language = fields.language, 69 | slowReadingParam = fields.slowReading ? '-s' : null, 70 | soxArgs = getSoxEffectsArgs(fields); 71 | 72 | var args0 = [ 73 | '-l', language, 74 | '--nocheck', 75 | text 76 | ]; 77 | 78 | if (slowReadingParam) { 79 | args0.unshift(slowReadingParam); 80 | } 81 | 82 | var args1 = [ 83 | '-q', 84 | '-t', 'mp3', 85 | '-' 86 | ]; 87 | 88 | if (soxArgs.length) { 89 | args1 = args1.concat(soxArgs); 90 | } 91 | 92 | return [{ 93 | cmd: 'gtts-cli', 94 | args: args0 95 | },{ 96 | cmd: 'play', 97 | args: args1 98 | }]; 99 | } 100 | 101 | function getGttsFileCmdWithArgs(fields) { 102 | 103 | var text = fields.textToSpeech, 104 | language = fields.language, 105 | slowReadingParam = fields.slowReading ? '-s' : null, 106 | soxArgs = getSoxEffectsArgs(fields), 107 | outputPath = getAudioFilePath('mp3'); 108 | 109 | var args0 = [ 110 | '-l', language, 111 | '--nocheck', 112 | text 113 | ]; 114 | 115 | if (slowReadingParam) { 116 | args0.unshift(slowReadingParam); 117 | } 118 | 119 | var args1 = [ 120 | '-q', 121 | '-t', 'mp3', 122 | '-', 123 | outputPath 124 | ]; 125 | 126 | if (soxArgs.length) { 127 | args1 = args1.concat(soxArgs); 128 | } 129 | 130 | return [{ 131 | cmd: 'gtts-cli', 132 | args: args0 133 | },{ 134 | cmd: 'sox', 135 | args: args1, 136 | file: outputPath 137 | }]; 138 | } 139 | 140 | function getFestivalCmdWithArgs(fields) { 141 | 142 | var text = fields.textToSpeech, 143 | language = fields.language; 144 | 145 | return [{ 146 | cmd: 'echo', 147 | args: [ 148 | text 149 | ] 150 | },{ 151 | cmd: 'festival', 152 | args: [ 153 | '--tts', 154 | '--language', language, 155 | '--heap', '1000000' 156 | ] 157 | }]; 158 | } 159 | 160 | function getEspeakCmdWithArgs(fields) { 161 | 162 | var text = fields.textToSpeech, 163 | language = fields.language, 164 | voiceCode = fields.voiceCode || '', 165 | soxArgs = getSoxEffectsArgs(fields), 166 | voice = language; 167 | 168 | if (voiceCode) { 169 | voice += '+' + voiceCode; 170 | } 171 | 172 | var args0 = [ 173 | '-v', voice, 174 | '--stdout', 175 | text 176 | ]; 177 | 178 | var args1 = [ 179 | '-q', 180 | '-t', 'wav', 181 | '-' 182 | ]; 183 | 184 | if (soxArgs.length) { 185 | args1 = args1.concat(soxArgs); 186 | } 187 | 188 | return [{ 189 | cmd: 'espeak', 190 | args: args0 191 | },{ 192 | cmd: 'play', 193 | args: args1 194 | }]; 195 | } 196 | 197 | function getEspeakFileCmdWithArgs(fields) { 198 | 199 | var text = fields.textToSpeech, 200 | language = fields.language, 201 | voiceCode = fields.voiceCode || '', 202 | soxArgs = getSoxEffectsArgs(fields), 203 | voice = language, 204 | outputPath = getAudioFilePath('mp3'); 205 | 206 | if (voiceCode) { 207 | voice += '+' + voiceCode; 208 | } 209 | 210 | var args0 = [ 211 | '-v', voice, 212 | '--stdout', 213 | text 214 | ]; 215 | 216 | var args1 = [ 217 | '-q', 218 | '-t', 'wav', 219 | '-', 220 | '-t', 'mp3', 221 | outputPath 222 | ]; 223 | 224 | if (soxArgs.length) { 225 | args1 = args1.concat(soxArgs); 226 | } 227 | 228 | return [{ 229 | cmd: 'espeak', 230 | args: args0 231 | },{ 232 | cmd: 'sox', 233 | args: args1, 234 | file: outputPath 235 | }]; 236 | } 237 | 238 | function getAudioFilePath(extension) { 239 | 240 | var fileName = Math.random().toString(35).substring(2, 10), 241 | workingDirectory = 'audio'; 242 | 243 | return workingDirectory + '/' + fileName + '.' + extension; 244 | } 245 | 246 | function getSoxEffectsArgs(fields) { 247 | 248 | var availableParametrizedEffects = ['speed', 'pitch', 'tempo', 'gain', 'delay'], 249 | availableUnaryEffects = ['reverse', 'reverb'], 250 | args = [], 251 | i, effectName, effectValue; 252 | 253 | for (i = 0; i < availableParametrizedEffects.length; i++) { 254 | effectName = availableParametrizedEffects[i]; 255 | effectValue = fields[effectName]; 256 | 257 | if (effectValue !== undefined) { 258 | args.push(effectName, effectValue); 259 | } 260 | } 261 | 262 | for (i = 0; i < availableUnaryEffects.length; i++) { 263 | effectName = availableUnaryEffects[i]; 264 | effectValue = fields[effectName]; 265 | 266 | if (effectValue) { 267 | args.push(effectName); 268 | } 269 | } 270 | 271 | if (fields.robot) { 272 | args = args.concat(getSoxRobotVoiceArgs()); 273 | } 274 | 275 | return args; 276 | } 277 | 278 | function getSoxRobotVoiceArgs() { 279 | 280 | return [ 281 | 'overdrive', '10', 282 | 'echo', '0.8', '0.8', '5', '0.7', 283 | 'echo', '0.8', '0.7', '6', '0.7', 284 | 'echo', '0.8', '0.7', '10', '0.7', 285 | 'echo', '0.8', '0.7', '12', '0.7', 286 | 'echo', '0.8', '0.88', '12', '0.7', 287 | 'echo', '0.8', '0.88', '30', '0.7', 288 | 'echo', '0.6', '0.6', '60', '0.7', 289 | 'gain', '8' 290 | ]; 291 | } 292 | 293 | module.exports = { 294 | getCmdWithArgs: getCmdWithArgs 295 | }; 296 | --------------------------------------------------------------------------------