├── .github
    ├── FUNDING.yml
    └── workflows
    │   └── gitlab-mirror.yml
├── .npmignore
├── .gitignore
├── deploy
    ├── .env
    ├── config
    │   └── Caddyfile
    ├── compose.tts-api.yaml
    └── compose.caddy.yaml
├── views
    ├── index.pug
    ├── settings.pug
    └── form.pug
├── package.json
├── LICENSE
├── .gitlab-ci.yml
├── js
    └── form.js
├── app
    ├── index.js
    └── cmd.js
└── README.md


/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | github: pedroetb
2 | 


--------------------------------------------------------------------------------
/.npmignore:
--------------------------------------------------------------------------------
1 | .*
2 | deploy
3 | Dockerfile
4 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | node_modules
2 | npm-debug.log
3 | *.tgz
4 | 


--------------------------------------------------------------------------------
/deploy/.env:
--------------------------------------------------------------------------------
1 | PORT=3000
2 | CONTAINER_NAME=tts-api
3 | CADDY_PORT=2015
4 | TRAEFIK_DOMAIN=change.me
5 | 
6 | TTS_API_NET_NAME=tts-api-net
7 | TTS_API_NET_DRIVER=overlay
8 | 


--------------------------------------------------------------------------------
/deploy/config/Caddyfile:
--------------------------------------------------------------------------------
 1 | {
 2 | 	admin off
 3 | 	auto_https off
 4 | 	log {
 5 | 		format console {
 6 | 			time_format iso8601
 7 | 		}
 8 | 	}
 9 | }
10 | 
11 | :{$CADDY_PORT} {
12 | 	reverse_proxy {$CONTAINER_NAME}:{$PORT}
13 | 	respond /health "OK" 200
14 | }
15 | 


--------------------------------------------------------------------------------
/deploy/compose.tts-api.yaml:
--------------------------------------------------------------------------------
 1 | services:
 2 |   tts-api:
 3 |     image: ${IMAGE_NAME:-pedroetb/tts-api}:${IMAGE_TAG:-latest}
 4 |     container_name: ${CONTAINER_NAME}
 5 |     environment:
 6 |       PORT:
 7 |     networks:
 8 |       tts-api-net:
 9 |     devices:
10 |       - ${AUDIO_DEVICE:-/dev/snd}
11 |     restart: always
12 |     cpus: '${CPUS:-0.5}'
13 |     mem_limit: ${MEM_LIMIT:-128M}
14 |     mem_reservation: ${MEM_RESERVATION:-64M}
15 | 
16 | networks:
17 |   tts-api-net:
18 |     name: ${TTS_API_NET_NAME}
19 |     driver: ${TTS_API_NET_DRIVER}
20 |     external: true
21 | 


--------------------------------------------------------------------------------
/views/index.pug:
--------------------------------------------------------------------------------
 1 | doctype html
 2 | 
 3 | include settings
 4 | 
 5 | html
 6 | 	head
 7 | 		title TTS-API
 8 | 		link(rel='stylesheet' href='/css/bootstrap/bootstrap.min.css')
 9 | 		link(rel='stylesheet' href='/css/alertify/alertify.min.css')
10 | 		link(rel='stylesheet' href='/css/alertify/default.min.css')
11 | 		script(src='/js/alertify/alertify.min.js')
12 | 		script(src='/js/form.js')
13 | 
14 | 	body(onload=`onVoiceChange(${JSON.stringify(voices[0])})`)
15 | 		div(
16 | 			style='padding-top: 15px; padding-bottom: 15px'
17 | 		).container
18 | 			div.row
19 | 				div.col-12
20 | 					h1 TTS-API
21 | 
22 | 			div.row
23 | 				div.col-12
24 | 					include form
25 | 


--------------------------------------------------------------------------------
/.github/workflows/gitlab-mirror.yml:
--------------------------------------------------------------------------------
 1 | name: Mirror and run GitLab CI
 2 | 
 3 | on: [push]
 4 | 
 5 | jobs:
 6 |   build:
 7 |     runs-on: ubuntu-latest
 8 |     steps:
 9 |     - uses: actions/checkout@v1
10 |     - name: Mirror + trigger CI
11 |       uses: SvanBoxel/gitlab-mirror-and-ci-action@master
12 |       with:
13 |         args: "https://gitlab.com/pedroetb-projects/tts-api.git/"
14 |       env:
15 |         FOLLOW_TAGS: "true"
16 |         FORCE_PUSH: "false"
17 |         GITLAB_HOSTNAME: "gitlab.com"
18 |         GITLAB_USERNAME: "pedroetb"
19 |         GITLAB_PASSWORD: ${{ secrets.GITLAB_PASSWORD }}
20 |         GITLAB_PROJECT_ID: "16939539"
21 |         GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
22 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "tts-api",
 3 |   "version": "2.6.1",
 4 |   "description": "Text to speech REST API for multiple TTS engines",
 5 |   "keywords": [
 6 |     "text-to-speech",
 7 |     "tts",
 8 |     "speech",
 9 |     "voice",
10 |     "rest-api",
11 |     "api"
12 |   ],
13 |   "homepage": "https://github.com/pedroetb/tts-api",
14 |   "license": "MIT",
15 |   "author": "Pedro Trujillo",
16 |   "contributors": [
17 |     {
18 |       "name": "Pedro Trujillo",
19 |       "email": "pedroetb@gmail.com"
20 |     }
21 |   ],
22 |   "main": "app",
23 |   "repository": {
24 |     "type": "git",
25 |     "url": "https://github.com/pedroetb/tts-api.git"
26 |   },
27 |   "scripts": {
28 |     "start": "node app"
29 |   },
30 |   "dependencies": {
31 |     "alertifyjs": "^1.13.1",
32 |     "body-parser": "^1.20.2",
33 |     "bootstrap": "^4.6.2",
34 |     "express": "^4.18.3",
35 |     "pug": "^3.0.2"
36 |   },
37 |   "files": [
38 |     "app",
39 |     "views",
40 |     "js"
41 |   ],
42 |   "engines": {
43 |     "node": ">=18.19.1",
44 |     "npm": ">=10.2.4"
45 |   }
46 | }
47 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 Pedro Trujillo (pedroetb)
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/.gitlab-ci.yml:
--------------------------------------------------------------------------------
 1 | include:
 2 |   - project: 'pedroetb-projects/gitlab-ci-templates'
 3 |     ref: master
 4 |     file: '/packaging-docker/docker-build.yml'
 5 |   - project: 'pedroetb-projects/gitlab-ci-templates'
 6 |     ref: master
 7 |     file: '/deployment-service/custom-image.yml'
 8 |   - project: 'pedroetb-projects/gitlab-ci-templates'
 9 |     ref: master
10 |     file: '/deployment-service/lulu/docker-deploy.yml'
11 | 
12 | stages:
13 |   - pre-package
14 |   - package
15 |   - post-package
16 |   - deploy
17 | 
18 | .deploy:
19 |   variables:
20 |     FORCE_DOCKER_COMPOSE: 1
21 |     COMPOSE_FILE: compose.tts-api.yaml
22 |     SERVICES_TO_CHECK: tts-api
23 | 
24 | .deploy-lulu:
25 |   environment:
26 |     name: lulu/tts-api
27 | 
28 | .deploy-caddy:
29 |   variables: &deploy-caddy-variables
30 |     FORCE_DOCKER_COMPOSE: 0
31 |     COMPOSE_FILE: compose.caddy.yaml
32 |     SERVICES_TO_CHECK: ${CI_PROJECT_NAME}_caddy
33 | 
34 | .deploy-lulu-caddy:
35 |   environment: &deploy-lulu-caddy-environment
36 |     name: lulu/caddy
37 | 
38 | deploy-branch-lulu-caddy:
39 |   extends: deploy-branch-lulu
40 |   variables:
41 |     <<: *deploy-caddy-variables
42 |   environment: *deploy-lulu-caddy-environment
43 | 
44 | deploy-tag-lulu-caddy:
45 |   extends: deploy-tag-lulu
46 |   variables:
47 |     <<: *deploy-caddy-variables
48 |   environment: *deploy-lulu-caddy-environment
49 | 


--------------------------------------------------------------------------------
/deploy/compose.caddy.yaml:
--------------------------------------------------------------------------------
 1 | services:
 2 |   caddy:
 3 |     image: ${CADDY_IMAGE_NAME:-caddy}:${CADDY_IMAGE_TAG:-latest}
 4 |     environment:
 5 |       CADDY_PORT:
 6 |       CONTAINER_NAME:
 7 |       PORT:
 8 |     networks:
 9 |       tts-api-net:
10 |       traefik-net:
11 |     configs:
12 |       - source: caddyfile-config
13 |         target: /etc/caddy/Caddyfile
14 |     healthcheck:
15 |       test: wget --spider -q http://localhost:${CADDY_PORT}/health
16 |       interval: ${HEALTHCHECK_INTERVAL:-30s}
17 |       timeout: ${HEALTHCHECK_TIMEOUT:-15s}
18 |       retries: ${HEALTHCHECK_RETRIES:-10}
19 |       start_period: ${HEALTHCHECK_START_PERIOD:-30s}
20 |     deploy:
21 |       mode: replicated
22 |       replicas: ${REPLICAS:-1}
23 |       restart_policy:
24 |         delay: ${RESTART_DELAY:-3s}
25 |       update_config:
26 |         delay: ${UPDATE_DELAY:-30s}
27 |       labels:
28 |         traefik.enable: ${TRAEFIK_ENABLE:-true}
29 |         traefik.http.routers.tts-api.entrypoints: ${TRAEFIK_ENTRYPOINT:-https}
30 |         traefik.http.routers.tts-api.rule: Host(`${TRAEFIK_SUBDOMAIN:-tts}.${TRAEFIK_DOMAIN}`)
31 |         traefik.http.services.tts-api.loadbalancer.server.port: ${CADDY_PORT}
32 |       resources:
33 |         limits:
34 |           cpus: '${RESOURCES_LIMITS_CPUS:-0.1}'
35 |           memory: ${RESOURCES_LIMITS_MEMORY:-64M}
36 |         reservations:
37 |           cpus: '${RESOURCES_RESERVATIONS_CPUS:-0.001}'
38 |           memory: ${RESOURCES_RESERVATIONS_MEMORY:-32M}
39 | 
40 | networks:
41 |   tts-api-net:
42 |     name: ${TTS_API_NET_NAME}
43 |     driver: ${TTS_API_NET_DRIVER}
44 |     attachable: true
45 | 
46 |   traefik-net:
47 |     name: ${TRAEFIK_NET_NAME:-traefik-net}
48 |     driver: ${TRAEFIK_NET_DRIVER:-overlay}
49 |     external: true
50 | 
51 | configs:
52 |   caddyfile-config:
53 |     name: ${CADDYFILE_CONFIG_NAME:-tts-api-caddyfile}
54 |     file: ./config/Caddyfile
55 | 


--------------------------------------------------------------------------------
/js/form.js:
--------------------------------------------------------------------------------
 1 | var disabledInputIds;
 2 | 
 3 | function onVoiceChange(voiceItem) {
 4 | 
 5 | 	if (disabledInputIds) {
 6 | 		updateInputsDisabledState(disabledInputIds, false);
 7 | 		disabledInputIds = null;
 8 | 	}
 9 | 
10 | 	var incompatibleSettings = voiceItem.incompatibleSettings;
11 | 	if (incompatibleSettings && incompatibleSettings instanceof Array) {
12 | 		updateInputsDisabledState(incompatibleSettings, true);
13 | 		disabledInputIds = incompatibleSettings;
14 | 	}
15 | }
16 | 
17 | function updateInputsDisabledState(inputIds, disable) {
18 | 
19 | 	for (var i = 0; i < inputIds.length; i++) {
20 | 		var input = document.getElementById(inputIds[i]);
21 | 		if (disable) {
22 | 			input.setAttribute('disabled', '');
23 | 		} else {
24 | 			input.removeAttribute('disabled');
25 | 		}
26 | 	}
27 | }
28 | 
29 | function onSubmit(evt) {
30 | 
31 | 	var form = document.forms[0];
32 | 
33 | 	if (!form || !form.checkValidity()) {
34 | 		return;
35 | 	}
36 | 
37 | 	evt.preventDefault();
38 | 
39 | 	var value = getInputValues(form);
40 | 	submitData(value);
41 | }
42 | 
43 | function getInputValues(form) {
44 | 
45 | 	var inputs = form.elements,
46 | 		inputValues = {};
47 | 
48 | 	for (var i = 0; i < inputs.length; i++) {
49 | 		var input = inputs[i],
50 | 			inputName = input.name,
51 | 			inputType = input.type,
52 | 			inputValueAlreadySet = !!inputValues[inputName];
53 | 
54 | 		if (inputValueAlreadySet) {
55 | 			continue;
56 | 		}
57 | 
58 | 		var namedInput = inputs[inputName],
59 | 			inputIsDisabled = !(!disabledInputIds || disabledInputIds.indexOf(inputName) === -1);
60 | 
61 | 		if (!namedInput || inputIsDisabled) {
62 | 			continue;
63 | 		}
64 | 
65 | 		inputValues[inputName] = inputType === 'checkbox' ? namedInput.checked : namedInput.value;
66 | 	}
67 | 
68 | 	return inputValues;
69 | }
70 | 
71 | function submitData(data) {
72 | 
73 | 	alertify.set('notifier', 'position', 'top-right');
74 | 	var startMessageHandler = alertify.message('Speaking...', 0);
75 | 
76 | 	fetch('/', {
77 | 		method: 'post',
78 | 		headers: new Headers({
79 | 			'Content-Type': 'application/json'
80 | 		}),
81 | 		body: JSON.stringify(data)
82 | 	}).then((function(startMessageHandler, res) {
83 | 
84 | 		startMessageHandler.dismiss();
85 | 		if (res.ok) {
86 | 			alertify.success('Speech done!');
87 | 		} else {
88 | 			alertify.error('Speech error!');
89 | 		}
90 | 	}).bind(null, startMessageHandler))
91 | 	.catch((function(startMessageHandler, err) {
92 | 
93 | 		startMessageHandler.dismiss();
94 | 		alertify.error('Connection error!');
95 | 	}).bind(null, startMessageHandler));
96 | }
97 | 


--------------------------------------------------------------------------------
/views/settings.pug:
--------------------------------------------------------------------------------
  1 | -
  2 | 	voice = {
  3 | 		name: 'voice',
  4 | 		label: 'Voice'
  5 | 	}
  6 | 
  7 | 	text = {
  8 | 		id: 'textToSpeech',
  9 | 		label: 'Text',
 10 | 		minLength: 1,
 11 | 		maxLength: 1000
 12 | 	}
 13 | 
 14 | 	language = {
 15 | 		id: 'language',
 16 | 		label: 'Language',
 17 | 		value: 'es',
 18 | 		minLength: 2,
 19 | 		maxLength: 50
 20 | 	}
 21 | 
 22 | 	voiceCode = {
 23 | 		id: 'voiceCode',
 24 | 		label: 'Voice code',
 25 | 		value: 'f4',
 26 | 		maxLength: 50
 27 | 	}
 28 | 
 29 | 	speed = {
 30 | 		id: 'speed',
 31 | 		label: 'Speed',
 32 | 		value: 1,
 33 | 		min: 0.4,
 34 | 		max: 2,
 35 | 		step: 0.01
 36 | 	}
 37 | 
 38 | 	pitch = {
 39 | 		id: 'pitch',
 40 | 		label: 'Pitch',
 41 | 		value: 1,
 42 | 		min: -2000,
 43 | 		max: 2000,
 44 | 		step: 1
 45 | 	}
 46 | 
 47 | 	tempo = {
 48 | 		id: 'tempo',
 49 | 		label: 'Tempo',
 50 | 		value: 1,
 51 | 		min: 0.1,
 52 | 		max: 5,
 53 | 		step: 0.01
 54 | 	}
 55 | 
 56 | 	gain = {
 57 | 		id: 'gain',
 58 | 		label: 'Gain',
 59 | 		value: 1,
 60 | 		min: -50,
 61 | 		max: 50,
 62 | 		step: 0.1
 63 | 	}
 64 | 
 65 | 	delay = {
 66 | 		id: 'delay',
 67 | 		label: 'Delay',
 68 | 		value: 0,
 69 | 		min: 0,
 70 | 		max: 10,
 71 | 		step: 0.1
 72 | 	}
 73 | 
 74 | 	modifiers = {
 75 | 		label: 'Modifiers'
 76 | 	}
 77 | 
 78 | 	slowReading = {
 79 | 		id: 'slowReading',
 80 | 		label: 'Slow reading'
 81 | 	}
 82 | 
 83 | 	reverse = {
 84 | 		id: 'reverse',
 85 | 		label: 'Reverse'
 86 | 	}
 87 | 
 88 | 	reverb = {
 89 | 		id: 'reverb',
 90 | 		label: 'Reverb'
 91 | 	}
 92 | 
 93 | 	robot = {
 94 | 		id: 'robot',
 95 | 		label: 'Robot'
 96 | 	}
 97 | 
 98 | 	voices = [{
 99 | 		value: 'google_speech',
100 | 		label: 'GoogleSpeech',
101 | 		incompatibleSettings: [
102 | 			voiceCode.id,
103 | 			slowReading.id
104 | 		]
105 | 	},{
106 | 		value: 'google_speech_file',
107 | 		label: 'GoogleSpeech (file)',
108 | 		incompatibleSettings: [
109 | 			voiceCode.id,
110 | 			speed.id,
111 | 			pitch.id,
112 | 			tempo.id,
113 | 			gain.id,
114 | 			delay.id,
115 | 			slowReading.id,
116 | 			reverse.id,
117 | 			reverb.id,
118 | 			robot.id
119 | 		]
120 | 	},{
121 | 		value: 'gtts',
122 | 		label: 'gTTS',
123 | 		incompatibleSettings: [
124 | 			voiceCode.id
125 | 		]
126 | 	},{
127 | 		value: 'gtts_file',
128 | 		label: 'gTTS (file)',
129 | 		incompatibleSettings: [
130 | 			voiceCode.id
131 | 		]
132 | 	},{
133 | 		value: 'festival',
134 | 		label: 'Festival',
135 | 		incompatibleSettings: [
136 | 			voiceCode.id,
137 | 			speed.id,
138 | 			pitch.id,
139 | 			tempo.id,
140 | 			gain.id,
141 | 			delay.id,
142 | 			slowReading.id,
143 | 			reverse.id,
144 | 			reverb.id,
145 | 			robot.id
146 | 		]
147 | 	},{
148 | 		value: 'espeak',
149 | 		label: 'eSpeak',
150 | 		incompatibleSettings: [
151 | 			slowReading.id
152 | 		]
153 | 	},{
154 | 		value: 'espeak_file',
155 | 		label: 'eSpeak (file)',
156 | 		incompatibleSettings: [
157 | 			slowReading.id
158 | 		]
159 | 	}]
160 | 


--------------------------------------------------------------------------------
/views/form.pug:
--------------------------------------------------------------------------------
  1 | form
  2 | 	div.form-group
  3 | 		label #{voice.label}
  4 | 		div.form-check
  5 | 			each voiceItem, index in voices
  6 | 				div.form-check-inline
  7 | 					label.form-check-label
  8 | 						input.form-check-input(
  9 | 							type='radio'
 10 | 							name=voice.name
 11 | 							value=voiceItem.value
 12 | 							checked=(!index ? true : false)
 13 | 							onchange=`onVoiceChange(${JSON.stringify(voiceItem)})`
 14 | 						)
 15 | 						span #{voiceItem.label}
 16 | 
 17 | 	div.form-group
 18 | 		label(for=text.id) #{text.label}
 19 | 		textarea(
 20 | 			id=text.id
 21 | 			type='text'
 22 | 			name=text.id
 23 | 			placeholder=text.label
 24 | 			autofocus
 25 | 			minlength=text.minLength
 26 | 			maxlength=text.maxLength
 27 | 			required
 28 | 		).form-control
 29 | 
 30 | 	div.form-group
 31 | 		label(for=language.id) #{language.label}
 32 | 		input(
 33 | 			id=language.id
 34 | 			type='text'
 35 | 			name=language.id
 36 | 			placeholder=language.label
 37 | 			value=language.value
 38 | 			minlength=language.minLength
 39 | 			maxlength=language.maxLength
 40 | 			required
 41 | 		).form-control
 42 | 
 43 | 	div.form-group
 44 | 		label(for=voiceCode.id) #{voiceCode.label}
 45 | 		input(
 46 | 			id=voiceCode.id
 47 | 			type='text'
 48 | 			name=voiceCode.id
 49 | 			placeholder=voiceCode.label
 50 | 			value=voiceCode.value
 51 | 			maxlength=voiceCode.maxLength
 52 | 		).form-control
 53 | 
 54 | 	div.form-group
 55 | 		label(for=speed.id) #{speed.label}
 56 | 		input(
 57 | 			id=speed.id
 58 | 			type='number'
 59 | 			name=speed.id
 60 | 			placeholder=speed.label
 61 | 			value=speed.value
 62 | 			min=speed.min
 63 | 			max=speed.max
 64 | 			step=speed.step
 65 | 		).form-control
 66 | 
 67 | 	div.form-group
 68 | 		label(for=pitch.id) #{pitch.label}
 69 | 		input(
 70 | 			id=pitch.id
 71 | 			type='number'
 72 | 			name=pitch.id
 73 | 			placeholder=pitch.label
 74 | 			value=pitch.value
 75 | 			min=pitch.min
 76 | 			max=pitch.max
 77 | 			step=pitch.step
 78 | 		).form-control
 79 | 
 80 | 	div.form-group
 81 | 		label(for=tempo.id) #{tempo.label}
 82 | 		input(
 83 | 			id=tempo.id
 84 | 			type='number'
 85 | 			name=tempo.id
 86 | 			placeholder=tempo.label
 87 | 			value=tempo.value
 88 | 			min=tempo.min
 89 | 			max=tempo.max
 90 | 			step=tempo.step
 91 | 		).form-control
 92 | 
 93 | 	div.form-group
 94 | 		label(for=gain.id) #{gain.label}
 95 | 		input(
 96 | 			id=gain.id
 97 | 			type='number'
 98 | 			name=gain.id
 99 | 			placeholder=gain.label
100 | 			value=gain.value
101 | 			min=gain.min
102 | 			max=gain.max
103 | 			step=gain.step
104 | 		).form-control
105 | 
106 | 	div.form-group
107 | 		label(for=delay.id) #{delay.label}
108 | 		input(
109 | 			id=delay.id
110 | 			type='number'
111 | 			name=delay.id
112 | 			placeholder=delay.label
113 | 			value=delay.value
114 | 			min=delay.min
115 | 			max=delay.max
116 | 			step=delay.step
117 | 		).form-control
118 | 
119 | 	div.form-group
120 | 		label #{modifiers.label}
121 | 		div.form-check
122 | 			div.form-check-inline
123 | 				label.form-check-label
124 | 					input.form-check-input(
125 | 						id=slowReading.id
126 | 						type='checkbox'
127 | 						name=slowReading.id
128 | 					)
129 | 					span #{slowReading.label}
130 | 
131 | 			div.form-check-inline
132 | 				label.form-check-label
133 | 					input.form-check-input(
134 | 						id=reverse.id
135 | 						type='checkbox'
136 | 						name=reverse.id
137 | 					)
138 | 					span #{reverse.label}
139 | 
140 | 			div.form-check-inline
141 | 				label.form-check-label
142 | 					input.form-check-input(
143 | 						id=reverb.id
144 | 						type='checkbox'
145 | 						name=reverb.id
146 | 					)
147 | 					span #{reverb.label}
148 | 
149 | 			div.form-check-inline
150 | 				label.form-check-label
151 | 					input.form-check-input(
152 | 						id=robot.id
153 | 						type='checkbox'
154 | 						name=robot.id
155 | 					)
156 | 					span #{robot.label}
157 | 
158 | 	button(
159 | 		onclick='onSubmit(event)'
160 | 	).btn.btn-primary
161 | 		span Submit
162 | 


--------------------------------------------------------------------------------
/app/index.js:
--------------------------------------------------------------------------------
  1 | var util = require('util'),
  2 | 	childProcess = require('child_process'),
  3 | 	express = require('express'),
  4 | 	bodyParser = require('body-parser'),
  5 | 
  6 | 	cmd = require('./cmd'),
  7 | 
  8 | 	server = express(),
  9 | 	port = process.env.PORT || 3000;
 10 | 
 11 | server.set('view engine', 'pug')
 12 | 	.use(bodyParser.json())
 13 | 
 14 | 	.use('/css/bootstrap', express.static(__dirname + '/../node_modules/bootstrap/dist/css'))
 15 | 	.use('/css/alertify', express.static(__dirname + '/../node_modules/alertifyjs/build/css'))
 16 | 	.use('/css/alertify', express.static(__dirname + '/../node_modules/alertifyjs/build/css/themes'))
 17 | 	.use('/js', express.static(__dirname + '/../js'))
 18 | 	.use('/js/alertify', express.static(__dirname + '/../node_modules/alertifyjs/build'))
 19 | 
 20 | 	.get('/', renderForm)
 21 | 	.post('/', processData)
 22 | 
 23 | 	.listen(port, function() {
 24 | 
 25 | 		console.log('Listening at port', this.address().port);
 26 | 	});
 27 | 
 28 | function renderForm(req, res) {
 29 | 
 30 | 	res.render('index');
 31 | }
 32 | 
 33 | function processData(req, res) {
 34 | 
 35 | 	var body = req.body,
 36 | 		cmdWithArgs = cmd.getCmdWithArgs(body) || {},
 37 | 		httpArgs = {
 38 | 			res: res,
 39 | 			fields: body
 40 | 		};
 41 | 
 42 | 	if (cmdWithArgs instanceof Array) {
 43 | 		runSpeechProcessChain(cmdWithArgs, httpArgs);
 44 | 	} else {
 45 | 		runLastSpeechProcess(cmdWithArgs, httpArgs);
 46 | 	}
 47 | }
 48 | 
 49 | function runLastSpeechProcess(cmdWithArgs, httpArgs) {
 50 | 
 51 | 	var speechProcess = runSpeechProcess(cmdWithArgs);
 52 | 
 53 | 	speechProcess.on('error', onLastSpeechError.bind(this, httpArgs));
 54 | 	speechProcess.on('close', onLastSpeechClose);
 55 | 	speechProcess.on('exit', onLastSpeechExit.bind(this, cmdWithArgs, httpArgs));
 56 | 
 57 | 	return speechProcess;
 58 | }
 59 | 
 60 | function runSpeechProcess(cmdWithArgs) {
 61 | 
 62 | 	var newProcess = childProcess.spawn(cmdWithArgs.cmd, cmdWithArgs.args);
 63 | 
 64 | 	newProcess.stderr.on('data', onSpeechStandardError);
 65 | 
 66 | 	return newProcess;
 67 | }
 68 | 
 69 | function onSpeechStandardError(buffer) {
 70 | 
 71 | 	console.error('[stderr]:', buffer.toString('utf8'));
 72 | }
 73 | 
 74 | function runSpeechProcessChain(cmdWithArgs, httpArgs) {
 75 | 
 76 | 	var speechProcs = {};
 77 | 
 78 | 	for (var i = 0; i < cmdWithArgs.length; i++) {
 79 | 		if (i !== cmdWithArgs.length - 1) {
 80 | 			var getNextProcessCbk = getNextSpeechProcess.bind(speechProcs, i + 1);
 81 | 			speechProcs[i] = runIntermediateSpeechProcess(cmdWithArgs[i], getNextProcessCbk);
 82 | 		} else {
 83 | 			speechProcs[i] = runLastSpeechProcess(cmdWithArgs[i], httpArgs);
 84 | 		}
 85 | 	}
 86 | }
 87 | 
 88 | function runIntermediateSpeechProcess(cmdWithArgs, procArgs) {
 89 | 
 90 | 	var speechProcess = runSpeechProcess(cmdWithArgs);
 91 | 
 92 | 	speechProcess.stdout.on('data', onIntermediateSpeechStandardOutput.bind(this, procArgs));
 93 | 	speechProcess.on('error', onIntermediateSpeechError);
 94 | 	speechProcess.on('close', onIntermediateSpeechClose.bind(this, procArgs));
 95 | 
 96 | 	return speechProcess;
 97 | }
 98 | 
 99 | function getNextSpeechProcess(nextIndex) {
100 | 
101 | 	return this[nextIndex];
102 | }
103 | 
104 | function onIntermediateSpeechStandardOutput(getNextProc, data) {
105 | 
106 | 	var nextSpeechProcess = getNextProc(),
107 | 		inputStream = nextSpeechProcess.stdin;
108 | 
109 | 	if (inputStream.writable) {
110 | 		inputStream.write(data);
111 | 	}
112 | }
113 | 
114 | function onIntermediateSpeechClose(getNextProc, code) {
115 | 
116 | 	var nextSpeechProcess = getNextProc(),
117 | 		inputStream = nextSpeechProcess.stdin;
118 | 
119 | 	if (code) {
120 | 		console.error('[intermediate exit code]:', code);
121 | 	}
122 | 
123 | 	inputStream.end();
124 | }
125 | 
126 | function onIntermediateSpeechError(err) {
127 | 
128 | 	console.error('[intermediate error]:', util.inspect(err));
129 | }
130 | 
131 | function onLastSpeechClose(code) {
132 | 
133 | 	if (code) {
134 | 		console.error('[exit code]:', code);
135 | 	}
136 | }
137 | 
138 | function onLastSpeechExit(cmdWithArgs, httpArgs, err) {
139 | 
140 | 	var res = httpArgs.res,
141 | 		filePath = cmdWithArgs.file;
142 | 
143 | 	if (!err) {
144 | 		if (filePath) {
145 | 			res.download(filePath);
146 | 		} else {
147 | 			res.end();
148 | 		}
149 | 	} else {
150 | 		handleSpeechError(httpArgs, err);
151 | 	}
152 | }
153 | 
154 | function onLastSpeechError(args, err) {
155 | 
156 | 	handleSpeechError(args, err);
157 | }
158 | 
159 | function handleSpeechError(args, err) {
160 | 
161 | 	var res = args.res,
162 | 		fields = args.fields,
163 | 		errorHeaderMessage = '----[error]----',
164 | 		dataHeaderMessage = '-----[data]-----',
165 | 		inspectedError = util.inspect(err),
166 | 		inspectedFields = util.inspect(fields);
167 | 
168 | 	res.writeHead(500, {
169 | 		'Content-Type': 'text/plain; charset=utf-8'
170 | 	});
171 | 
172 | 	res.write(errorHeaderMessage + '\n');
173 | 	res.write(inspectedError + '\n');
174 | 	res.write(dataHeaderMessage + '\n');
175 | 	res.write(inspectedFields + '\n');
176 | 
177 | 	res.end();
178 | 
179 | 	console.error(errorHeaderMessage);
180 | 	console.error(inspectedError);
181 | 	console.error(dataHeaderMessage);
182 | 	console.error(inspectedFields);
183 | }
184 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # TTS-API
  2 | 
  3 | Text to speech REST API for multiple TTS engines.
  4 | 
  5 | [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE)
  6 | [![Sponsor](https://img.shields.io/badge/-Sponsor-fafbfc?logo=GitHub%20Sponsors)](https://github.com/sponsors/pedroetb)
  7 | 
  8 | You can send a text to be converted into audio, using different TTS engines and sound effects. Then, you will be listening to it on your local audio device, or receiving the resultant audio file.
  9 | 
 10 | ## Setup
 11 | 
 12 | First, you should install the supported TTS engines:
 13 | 
 14 | ### GoogleSpeech setup
 15 | 
 16 | ```sh
 17 | apt install python3 sox libsox-fmt-mp3
 18 | pip install google_speech
 19 | ```
 20 | 
 21 | ### gTTS setup
 22 | 
 23 | ```sh
 24 | apt install python3 sox libsox-fmt-mp3
 25 | pip install gTTS
 26 | ```
 27 | 
 28 | ### Festival setup
 29 | 
 30 | ```sh
 31 | apt install festival festvox-ellpc11k
 32 | ```
 33 | 
 34 | ### eSpeak setup
 35 | 
 36 | ```sh
 37 | apt install espeak
 38 | ```
 39 | 
 40 | You also need to install **nodejs** and **npm**, and then, simply run `npm install` and `npm start`.
 41 | The API should now be running at `http://localhost:3000`.
 42 | 
 43 | Or you can just use [pedroetb/tts-api](https://hub.docker.com/r/pedroetb/tts-api) **Docker** image, which already has all dependencies configured.
 44 | 
 45 | ## Setup using Docker
 46 | 
 47 | The only requirement is to have **Docker** installed. Then, you can run:
 48 | 
 49 | ```sh
 50 | docker run --rm -d --name tts-api --device /dev/snd -p 3000:3000 pedroetb/tts-api
 51 | ```
 52 | 
 53 | The API will be running and accessible at `http://localhost:3000`.
 54 | 
 55 | Alternatively, you can deploy it in a **Docker Swarm** cluster using `docker compose` (already included in Docker if using modern version) and `docker swarm` (create Swarm cluster first):
 56 | 
 57 | ```sh
 58 | cd deploy
 59 | 
 60 | # Deploy Caddy service
 61 | env $(grep -v '^[#| ]' .env | xargs) \
 62 |  TRAEFIK_DOMAIN=change.me \
 63 |  docker stack deploy \
 64 |  -c compose.caddy.yaml \
 65 |  tts-api
 66 | 
 67 | # Run TTS-API container
 68 | docker compose \
 69 |  -f compose.tts-api.yaml \
 70 |  -p tts-api \
 71 |  up -d
 72 | ```
 73 | 
 74 | The service is prepared to be reverse-proxied with **Traefik**, and accessible at `tts.${TRAEFIK_DOMAIN}` domain. How to run **Traefik** is not described here, check its [official site](https://traefik.io).
 75 | 
 76 | The proxy needs a little help from **Caddy**, because Docker Swarm is not compatible with devices configuration (required to use sound capabilities) and Traefik cannot work with Docker containers and Docker Swarm services all at once. This way, only **Caddy** service is exposed using **Traefik** and `tts-api` container is only accessible through reverse-proxy provided by **Caddy** (same way **Traefik** is reverse-proxing to **Caddy**).
 77 | 
 78 | Both, Docker container and service, can be running on different hosts, because they are able to communicate through a Docker overlay network. Run `tts-api` Docker container on host which has speakers, so you can listen speech.
 79 | 
 80 | Don't forget to edit `TRAEFIK_DOMAIN` environment variable before deploying.
 81 | 
 82 | ## Usage
 83 | 
 84 | When running, the API will receive POST requests at `http://localhost:3000`.
 85 | You can use your favourite REST client to send a request, or use the built-in form.
 86 | 
 87 | Both modes (playing or downloading audio) are available using different voice codes, select one according to your needs.
 88 | 
 89 | ### Built-in form
 90 | 
 91 | Go to `http://localhost:3000` with your browser, fill the form with data and submit it. Just that.
 92 | 
 93 | ### Send POST request
 94 | 
 95 | You can send a POST request to `http://localhost:3000` following this scheme:
 96 | 
 97 | * **Headers**
 98 |   * **Content-Type**: `application/json`
 99 | * **Body**
100 |   * `{ "voice": "google_speech", "textToSpeech": "hello world", "language": "en", "speed": "1" }`
101 | 
102 | For example, using `curl`:
103 | 
104 | ```sh
105 | # Play audio
106 | curl http://localhost:3000 \
107 |  -d '{ "voice": "google_speech", "textToSpeech": "hello world", "language": "en", "speed": "1" }' \
108 |  -H 'Content-Type: application/json'
109 | 
110 | # Download audio file
111 | curl http://localhost:3000 \
112 |  -d '{ "voice": "gtts_file", "textToSpeech": "hello world", "language": "en", "speed": "1" }' \
113 |  -H 'Content-Type: application/json' \
114 |  -o 'output.mp3'
115 | ```
116 | 
117 | ## Available TTS engines
118 | 
119 | ### GoogleSpeech engine
120 | 
121 | Google Speech is a simple multiplatform command line tool to read text using Google Translate TTS (Text To Speech) API.
122 | 
123 | You need to be online to communicate with Google servers.
124 | 
125 | Learn more at <https://github.com/desbma/GoogleSpeech>
126 | 
127 | ### gTTS engine
128 | 
129 | Google Text-to-Speech (gTTS) is a Python library and CLI tool to interface with Google Translate's text-to-speech API.
130 | 
131 | You need to be online to communicate with Google servers.
132 | 
133 | Learn more at <https://github.com/pndurette/gTTS>
134 | 
135 | ### Festival engine
136 | 
137 | Festival is a free software multi-lingual speech synthesis workbench that runs on multiple-platforms offering black box text to speech, as well as an open architecture for research in speech synthesis.
138 | 
139 | It works offline.
140 | 
141 | Learn more at <http://www.cstr.ed.ac.uk/projects/festival/> and <http://festvox.org/festival/>
142 | 
143 | ### eSpeak engine
144 | 
145 | eSpeak is a compact open source software speech synthesizer for English and other languages, for Linux and Windows.
146 | 
147 | It works offline.
148 | 
149 | Learn more at <http://espeak.sourceforge.net/>
150 | 
151 | ## License
152 | 
153 | [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE)
154 | 
155 | This project is released under the [MIT License](LICENSE).
156 | 


--------------------------------------------------------------------------------
/app/cmd.js:
--------------------------------------------------------------------------------
  1 | function getCmdWithArgs(fields) {
  2 | 
  3 | 	var voice = fields.voice;
  4 | 
  5 | 	if (voice === 'google_speech') {
  6 | 		return getGoogleSpeechCmdWithArgs(fields);
  7 | 	} else if (voice === 'google_speech_file') {
  8 | 		return getGoogleSpeechFileCmdWithArgs(fields);
  9 | 	} else if (voice === 'gtts') {
 10 | 		return getGttsCmdWithArgs(fields);
 11 | 	} else if (voice === 'gtts_file') {
 12 | 		return getGttsFileCmdWithArgs(fields);
 13 | 	} else if (voice === 'festival') {
 14 | 		return getFestivalCmdWithArgs(fields);
 15 | 	} else if (voice === 'espeak') {
 16 | 		return getEspeakCmdWithArgs(fields);
 17 | 	} else if (voice === 'espeak_file') {
 18 | 		return getEspeakFileCmdWithArgs(fields);
 19 | 	}
 20 | }
 21 | 
 22 | function getGoogleSpeechCmdWithArgs(fields) {
 23 | 
 24 | 	var text = fields.textToSpeech,
 25 | 		language = fields.language,
 26 | 		soxArgs = getSoxEffectsArgs(fields);
 27 | 
 28 | 	var args = [
 29 | 		'-l', language,
 30 | 		text,
 31 | 		'-v', 'warning'
 32 | 	];
 33 | 
 34 | 	if (soxArgs.length) {
 35 | 		args.push('-e');
 36 | 		args = args.concat(soxArgs);
 37 | 	}
 38 | 
 39 | 	return {
 40 | 		cmd: 'google_speech',
 41 | 		args: args
 42 | 	};
 43 | }
 44 | 
 45 | function getGoogleSpeechFileCmdWithArgs(fields) {
 46 | 
 47 | 	var text = fields.textToSpeech,
 48 | 		language = fields.language,
 49 | 		outputPath = getAudioFilePath('mp3');
 50 | 
 51 | 	var args = [
 52 | 		'-l', language,
 53 | 		text,
 54 | 		'-v', 'warning',
 55 | 		'-o', outputPath
 56 | 	];
 57 | 
 58 | 	return {
 59 | 		cmd: 'google_speech',
 60 | 		args: args,
 61 | 		file: outputPath
 62 | 	};
 63 | }
 64 | 
 65 | function getGttsCmdWithArgs(fields) {
 66 | 
 67 | 	var text = fields.textToSpeech,
 68 | 		language = fields.language,
 69 | 		slowReadingParam = fields.slowReading ? '-s' : null,
 70 | 		soxArgs = getSoxEffectsArgs(fields);
 71 | 
 72 | 	var args0 = [
 73 | 		'-l', language,
 74 | 		'--nocheck',
 75 | 		text
 76 | 	];
 77 | 
 78 | 	if (slowReadingParam) {
 79 | 		args0.unshift(slowReadingParam);
 80 | 	}
 81 | 
 82 | 	var args1 = [
 83 | 		'-q',
 84 | 		'-t', 'mp3',
 85 | 		'-'
 86 | 	];
 87 | 
 88 | 	if (soxArgs.length) {
 89 | 		args1 = args1.concat(soxArgs);
 90 | 	}
 91 | 
 92 | 	return [{
 93 | 		cmd: 'gtts-cli',
 94 | 		args: args0
 95 | 	},{
 96 | 		cmd: 'play',
 97 | 		args: args1
 98 | 	}];
 99 | }
100 | 
101 | function getGttsFileCmdWithArgs(fields) {
102 | 
103 | 	var text = fields.textToSpeech,
104 | 		language = fields.language,
105 | 		slowReadingParam = fields.slowReading ? '-s' : null,
106 | 		soxArgs = getSoxEffectsArgs(fields),
107 | 		outputPath = getAudioFilePath('mp3');
108 | 
109 | 	var args0 = [
110 | 		'-l', language,
111 | 		'--nocheck',
112 | 		text
113 | 	];
114 | 
115 | 	if (slowReadingParam) {
116 | 		args0.unshift(slowReadingParam);
117 | 	}
118 | 
119 | 	var args1 = [
120 | 		'-q',
121 | 		'-t', 'mp3',
122 | 		'-',
123 | 		outputPath
124 | 	];
125 | 
126 | 	if (soxArgs.length) {
127 | 		args1 = args1.concat(soxArgs);
128 | 	}
129 | 
130 | 	return [{
131 | 		cmd: 'gtts-cli',
132 | 		args: args0
133 | 	},{
134 | 		cmd: 'sox',
135 | 		args: args1,
136 | 		file: outputPath
137 | 	}];
138 | }
139 | 
140 | function getFestivalCmdWithArgs(fields) {
141 | 
142 | 	var text = fields.textToSpeech,
143 | 		language = fields.language;
144 | 
145 | 	return [{
146 | 		cmd: 'echo',
147 | 		args: [
148 | 			text
149 | 		]
150 | 	},{
151 | 		cmd: 'festival',
152 | 		args: [
153 | 			'--tts',
154 | 			'--language', language,
155 | 			'--heap', '1000000'
156 | 		]
157 | 	}];
158 | }
159 | 
160 | function getEspeakCmdWithArgs(fields) {
161 | 
162 | 	var text = fields.textToSpeech,
163 | 		language = fields.language,
164 | 		voiceCode = fields.voiceCode || '',
165 | 		soxArgs = getSoxEffectsArgs(fields),
166 | 		voice = language;
167 | 
168 | 	if (voiceCode) {
169 | 		voice += '+' + voiceCode;
170 | 	}
171 | 
172 | 	var args0 = [
173 | 		'-v', voice,
174 | 		'--stdout',
175 | 		text
176 | 	];
177 | 
178 | 	var args1 = [
179 | 		'-q',
180 | 		'-t', 'wav',
181 | 		'-'
182 | 	];
183 | 
184 | 	if (soxArgs.length) {
185 | 		args1 = args1.concat(soxArgs);
186 | 	}
187 | 
188 | 	return [{
189 | 		cmd: 'espeak',
190 | 		args: args0
191 | 	},{
192 | 		cmd: 'play',
193 | 		args: args1
194 | 	}];
195 | }
196 | 
197 | function getEspeakFileCmdWithArgs(fields) {
198 | 
199 | 	var text = fields.textToSpeech,
200 | 		language = fields.language,
201 | 		voiceCode = fields.voiceCode || '',
202 | 		soxArgs = getSoxEffectsArgs(fields),
203 | 		voice = language,
204 | 		outputPath = getAudioFilePath('mp3');
205 | 
206 | 	if (voiceCode) {
207 | 		voice += '+' + voiceCode;
208 | 	}
209 | 
210 | 	var args0 = [
211 | 		'-v', voice,
212 | 		'--stdout',
213 | 		text
214 | 	];
215 | 
216 | 	var args1 = [
217 | 		'-q',
218 | 		'-t', 'wav',
219 | 		'-',
220 | 		'-t', 'mp3',
221 | 		outputPath
222 | 	];
223 | 
224 | 	if (soxArgs.length) {
225 | 		args1 = args1.concat(soxArgs);
226 | 	}
227 | 
228 | 	return [{
229 | 		cmd: 'espeak',
230 | 		args: args0
231 | 	},{
232 | 		cmd: 'sox',
233 | 		args: args1,
234 | 		file: outputPath
235 | 	}];
236 | }
237 | 
238 | function getAudioFilePath(extension) {
239 | 
240 | 	var fileName = Math.random().toString(35).substring(2, 10),
241 | 		workingDirectory = 'audio';
242 | 
243 | 	return workingDirectory + '/' + fileName + '.' + extension;
244 | }
245 | 
246 | function getSoxEffectsArgs(fields) {
247 | 
248 | 	var availableParametrizedEffects = ['speed', 'pitch', 'tempo', 'gain', 'delay'],
249 | 		availableUnaryEffects = ['reverse', 'reverb'],
250 | 		args = [],
251 | 		i, effectName, effectValue;
252 | 
253 | 	for (i = 0; i < availableParametrizedEffects.length; i++) {
254 | 		effectName = availableParametrizedEffects[i];
255 | 		effectValue = fields[effectName];
256 | 
257 | 		if (effectValue !== undefined) {
258 | 			args.push(effectName, effectValue);
259 | 		}
260 | 	}
261 | 
262 | 	for (i = 0; i < availableUnaryEffects.length; i++) {
263 | 		effectName = availableUnaryEffects[i];
264 | 		effectValue = fields[effectName];
265 | 
266 | 		if (effectValue) {
267 | 			args.push(effectName);
268 | 		}
269 | 	}
270 | 
271 | 	if (fields.robot) {
272 | 		args = args.concat(getSoxRobotVoiceArgs());
273 | 	}
274 | 
275 | 	return args;
276 | }
277 | 
278 | function getSoxRobotVoiceArgs() {
279 | 
280 | 	return [
281 | 		'overdrive', '10',
282 | 		'echo', '0.8', '0.8', '5', '0.7',
283 | 		'echo', '0.8', '0.7', '6', '0.7',
284 | 		'echo', '0.8', '0.7', '10', '0.7',
285 | 		'echo', '0.8', '0.7', '12', '0.7',
286 | 		'echo', '0.8', '0.88', '12', '0.7',
287 | 		'echo', '0.8', '0.88', '30', '0.7',
288 | 		'echo', '0.6', '0.6', '60', '0.7',
289 | 		'gain', '8'
290 | 	];
291 | }
292 | 
293 | module.exports = {
294 | 	getCmdWithArgs: getCmdWithArgs
295 | };
296 | 


--------------------------------------------------------------------------------