├── .github
    └── workflows
    │   └── docker-publish.yml
├── .gitignore
├── Dockerfile
├── LICENSE
├── README.md
├── app.py
└── requirements.txt


/.github/workflows/docker-publish.yml:
--------------------------------------------------------------------------------
 1 | # This workflow uses actions that are not certified by GitHub.
 2 | # They are provided by a third-party and are governed by
 3 | # separate terms of service, privacy policy, and support
 4 | # documentation.
 5 | 
 6 | # GitHub recommends pinning actions to a commit SHA.
 7 | # To get a newer version, you will need to update the SHA.
 8 | # You can also reference a tag or branch, but the action may change without warning.
 9 | 
10 | name: Publish Docker image
11 | 
12 | on:
13 |   release:
14 |     types: [published]
15 | 
16 | jobs:
17 |   push_to_registry:
18 |     name: Push Docker image to Docker Hub
19 |     runs-on: ubuntu-latest
20 |     steps:
21 |       - name: Check out the repo
22 |         uses: actions/checkout@v4
23 | 
24 |       - name: Log in to Docker Hub
25 |         uses: docker/login-action@f4ef78c080cd8ba55a85445d5b36e214a81df20a
26 |         with:
27 |           username: ${{ secrets.DOCKER_USERNAME }}
28 |           password: ${{ secrets.DOCKER_PASSWORD }}
29 | 
30 |       - name: Extract metadata (tags, labels) for Docker
31 |         id: meta
32 |         uses: docker/metadata-action@9ec57ed1fcdbf14dcef7dfbe97b2010124a938b7
33 |         with:
34 |           images: timhagel/melotts-api-server
35 | 
36 |       - name: Build and push Docker image
37 |         uses: docker/build-push-action@3b5e8027fcad23fda98b2e3ac259d8d67585f671
38 |         with:
39 |           context: .
40 |           file: ./Dockerfile
41 |           push: true
42 |           tags: ${{ steps.meta.outputs.tags }}
43 |           labels: ${{ steps.meta.outputs.labels }}
44 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 | 
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 | 
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 | 
119 | # SageMath parsed files
120 | *.sage.py
121 | 
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 | 
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 | 
135 | # Rope project settings
136 | .ropeproject
137 | 
138 | # mkdocs documentation
139 | /site
140 | 
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 | 
146 | # Pyre type checker
147 | .pyre/
148 | 
149 | # pytype static type analyzer
150 | .pytype/
151 | 
152 | # Cython debug symbols
153 | cython_debug/
154 | 
155 | # PyCharm
156 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
159 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.9
 2 | COPY requirements.txt .
 3 | RUN pip install -r requirements.txt
 4 | RUN git clone https://github.com/myshell-ai/MeloTTS.git
 5 | WORKDIR /MeloTTS
 6 | RUN pip install --no-cache-dir -e .
 7 | RUN python -m unidic download
 8 | WORKDIR /
 9 | COPY . .
10 | EXPOSE 8080
11 | CMD ["python", "app.py"]


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 Timothy Hagel
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # MeloTTS Docker API Server
  2 | 
  3 | A quick easy way to access [MeloTTS](https://github.com/myshell-ai/MeloTTS) through REST API calls.
  4 | 
  5 | ## Build Image
  6 | Assuming you have docker installed and setup.
  7 | 
  8 | (This might take a bit because MeloTTS is a big dependency)
  9 | #### Local
 10 | 
 11 |     git clone git@github.com:timhagel/MeloTTS-Docker-API-Server.git
 12 |     cd MeloTTS-Docker-API-Server
 13 |     docker build -t timhagel/melotts-api-server .
 14 | 
 15 | #### Docker Hub
 16 | 
 17 |     docker pull timhagel/melotts-api-server
 18 |     
 19 | ## Languages and Speakers
 20 | 
 21 | #### Language
 22 | 
 23 | - EN - English
 24 | - ES - Spanish
 25 | - FR - French
 26 | - ZH - Chinese
 27 | - JP - Japanese
 28 | - KR - Korean
 29 | 
 30 | #### Speaker IDs
 31 | 
 32 | - EN-US - American English accent
 33 | - EN-BR - British English accent
 34 | - EN_INDIA - Indian English accent
 35 | - EN-AU - Australian English accent
 36 | - EN-Default - Default English accent
 37 | - **Notice!** Currently only English accents are working, and other accents are returning an error. This does not mean that other languages do not work!
 38 | 
 39 | ## Running
 40 | 
 41 | ### Run (CPU) (English)
 42 | 
 43 |     docker run --name melotts-server -p 8888:8080 -e DEFAULT_SPEED=1 -e DEFAULT_LANGUAGE=EN -e DEFAULT_SPEAKER_ID=EN-Default timhagel/melotts-api-server
 44 | 
 45 | ### Run (GPU) (English)
 46 |     
 47 |     docker run --name melotts-server -p 8888:8080 --gpus=all -e DEFAULT_SPEED=1 -e DEFAULT_LANGUAGE=EN -e DEFAULT_SPEAKER_ID=EN-Default timhagel/melotts-api-server
 48 | 
 49 | ## Call API
 50 | 
 51 | **localhost:8888/convert/tts**
 52 | 
 53 | ### Use Environment Defaults
 54 | Response: .wav
 55 | 
 56 | ###### Post body:
 57 | ```
 58 | {
 59 |     "text": "Put input here"
 60 | }
 61 | ```
 62 | 
 63 | ###### Example curl command:
 64 | ```sh
 65 | curl http://localhost:8888/convert/tts \
 66 | --header "Content-Type: application/json" \
 67 | -d '{ "text": "Put input here" }' \
 68 | --output "example.wav"
 69 | ```
 70 | 
 71 | ### Customize (Everything except for "text" is optional)
 72 | Response: .wav
 73 | 
 74 | ###### Post body:
 75 | ```
 76 | {
 77 |     "text": "input",
 78 |     "speed": "speed",
 79 |     "language": "language",
 80 |     "speaker_id": "speaker_id"
 81 | }
 82 | ```
 83 | 
 84 | ###### Example curl command:
 85 | ```sh
 86 | curl http://localhost:8888/convert/tts \
 87 | --header "Content-Type: application/json" \
 88 | -d '{
 89 |   "text": "Put input here",
 90 |   "speed": "0.5",
 91 |   "language": "EN",
 92 |   "speaker_id": "EN-BR"
 93 | }' \
 94 | --output "example.wav"
 95 | ```
 96 | 
 97 | ## Acknowledgement
 98 | 
 99 | This is just an API server for the awesome work of [MeloTTS](https://github.com/myshell-ai/MeloTTS) from [MyShell](https://github.com/myshell-ai)
100 | 


--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import uvicorn
 3 | from fastapi import FastAPI, Body, Depends
 4 | from pydantic import BaseModel
 5 | from fastapi.responses import FileResponse
 6 | from melo.api import TTS
 7 | from dotenv import load_dotenv
 8 | import tempfile
 9 | 
10 | load_dotenv()
11 | DEFAULT_SPEED = float(os.getenv("DEFAULT_SPEED"))
12 | DEFAULT_LANGUAGE = os.getenv("DEFAULT_LANGUAGE")
13 | DEFAULT_SPEAKER_ID = os.getenv("DEFAULT_SPEAKER_ID")
14 | device = "auto"  # Will automatically use GPU if available
15 | 
16 | 
17 | class TextModel(BaseModel):
18 |     text: str
19 |     speed: float = DEFAULT_SPEED
20 |     language: str = DEFAULT_LANGUAGE
21 |     speaker_id: str = DEFAULT_SPEAKER_ID
22 | 
23 | 
24 | app = FastAPI()
25 | 
26 | 
27 | def get_tts_model(body: TextModel):
28 |     return TTS(language=body.language, device=device)
29 | 
30 | 
31 | @app.post("/convert/tts")
32 | async def create_upload_file(
33 |     body: TextModel = Body(...), model: TTS = Depends(get_tts_model)
34 | ):
35 |     speaker_ids = model.hps.data.spk2id
36 | 
37 |     print(os.path.basename(body.text))
38 | 
39 |     # Use a temporary file
40 |     with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
41 |         output_path = tmp.name
42 |         model.tts_to_file(
43 |             body.text, speaker_ids[body.speaker_id], output_path, speed=body.speed
44 |         )
45 | 
46 |         # Return the audio file, ensure the file is not deleted until after the response is sent
47 |         response = FileResponse(
48 |             output_path, media_type="audio/mpeg", filename=os.path.basename(output_path)
49 |         )
50 | 
51 |     return response
52 | 
53 | 
54 | if __name__ == "__main__":
55 |     uvicorn.run(app, host="0.0.0.0", port=8080)
56 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
  1 | absl-py==2.1.0
  2 | aiofiles==23.2.1
  3 | altair==5.2.0
  4 | annotated-types==0.6.0
  5 | anyascii==0.3.2
  6 | anyio==4.3.0
  7 | attrs==23.2.0
  8 | audioread==3.0.1
  9 | Babel==2.14.0
 10 | boto3==1.34.64
 11 | botocore==1.34.64
 12 | cached_path==1.6.2
 13 | cachetools==5.3.3
 14 | certifi==2024.2.2
 15 | cffi==1.16.0
 16 | charset-normalizer==3.3.2
 17 | click==8.1.7
 18 | cn2an==0.5.22
 19 | colorama==0.4.6
 20 | contourpy==1.2.0
 21 | cycler==0.12.1
 22 | dateparser==1.1.8
 23 | decorator==5.1.1
 24 | Deprecated==1.2.14
 25 | Distance==0.1.3
 26 | dnspython==2.6.1
 27 | docopt==0.6.2
 28 | email_validator==2.1.1
 29 | eng-to-ipa==0.0.2
 30 | exceptiongroup==1.2.0
 31 | fastapi==0.110.0
 32 | ffmpy==0.3.2
 33 | filelock==3.13.1
 34 | fonttools==4.50.0
 35 | fsspec==2024.3.0
 36 | fugashi==1.3.0
 37 | g2p-en==2.1.0
 38 | g2pkk==0.1.2
 39 | google-api-core==2.17.1
 40 | google-auth==2.28.2
 41 | google-cloud-core==2.4.1
 42 | google-cloud-storage==2.15.0
 43 | google-crc32c==1.5.0
 44 | google-resumable-media==2.7.0
 45 | googleapis-common-protos==1.63.0
 46 | gradio==4.21.0
 47 | gradio_client==0.12.0
 48 | grpcio==1.62.1
 49 | gruut==2.2.3
 50 | gruut-ipa==0.13.0
 51 | gruut-lang-de==2.0.0
 52 | gruut-lang-en==2.0.0
 53 | gruut-lang-es==2.0.0
 54 | gruut-lang-fr==2.0.2
 55 | h11==0.14.0
 56 | httpcore==1.0.4
 57 | httptools==0.6.1
 58 | httpx==0.27.0
 59 | huggingface-hub==0.21.4
 60 | idna==3.6
 61 | importlib_metadata==7.0.2
 62 | importlib_resources==6.3.1
 63 | inflect==7.0.0
 64 | itsdangerous==2.1.2
 65 | jaconv==0.3.4
 66 | jamo==0.4.1
 67 | jieba==0.42.1
 68 | Jinja2==3.1.3
 69 | jmespath==1.0.1
 70 | joblib==1.3.2
 71 | jsonlines==1.2.0
 72 | jsonschema==4.21.1
 73 | jsonschema-specifications==2023.12.1
 74 | kiwisolver==1.4.5
 75 | langid==1.1.6
 76 | librosa==0.9.1
 77 | llvmlite==0.42.0
 78 | loguru==0.7.2
 79 | Markdown==3.6
 80 | markdown-it-py==3.0.0
 81 | MarkupSafe==2.1.5
 82 | matplotlib==3.8.3
 83 | mdurl==0.1.2
 84 | mecab-python3==1.0.5
 85 | networkx==2.8.8
 86 | nltk==3.8.1
 87 | num2words==0.5.12
 88 | numba==0.59.0
 89 | numpy==1.26.4
 90 | nvidia-cublas-cu11==11.10.3.66
 91 | nvidia-cuda-nvrtc-cu11==11.7.99
 92 | nvidia-cuda-runtime-cu11==11.7.99
 93 | nvidia-cudnn-cu11==8.5.0.96
 94 | orjson==3.9.15
 95 | packaging==24.0
 96 | pandas==2.2.1
 97 | pillow==10.2.0
 98 | pip==23.0.1
 99 | plac==1.4.3
100 | platformdirs==4.2.0
101 | pooch==1.8.1
102 | proces==0.1.7
103 | protobuf==4.25.3
104 | pyasn1==0.5.1
105 | pyasn1-modules==0.3.0
106 | pycparser==2.21
107 | pydantic==2.6.4
108 | pydantic_core==2.16.3
109 | pydantic-extra-types==2.6.0
110 | pydantic-settings==2.2.1
111 | pydub==0.25.1
112 | Pygments==2.17.2
113 | pykakasi==2.2.1
114 | pyparsing==3.1.2
115 | pypinyin==0.50.0
116 | python-crfsuite==0.9.10
117 | python-dateutil==2.9.0.post0
118 | python-dotenv==1.0.0
119 | python-multipart==0.0.9
120 | pytz==2024.1
121 | PyYAML==6.0.1
122 | referencing==0.34.0
123 | regex==2023.12.25
124 | requests==2.31.0
125 | resampy==0.4.3
126 | rich==13.7.1
127 | rpds-py==0.18.0
128 | rsa==4.9
129 | ruff==0.3.3
130 | s3transfer==0.10.1
131 | scikit-learn==1.4.1.post1
132 | scipy==1.12.0
133 | semantic-version==2.10.0
134 | setuptools==58.1.0
135 | shellingham==1.5.4
136 | six==1.16.0
137 | sniffio==1.3.1
138 | soundfile==0.12.1
139 | starlette==0.36.3
140 | tensorboard==2.16.2
141 | tensorboard-data-server==0.7.2
142 | threadpoolctl==3.3.0
143 | tokenizers==0.13.3
144 | tomlkit==0.12.0
145 | toolz==0.12.1
146 | torch==1.13.1
147 | torchaudio==0.13.1
148 | tqdm==4.66.2
149 | transformers==4.27.4
150 | txtsplit==1.0.0
151 | typer==0.9.0
152 | typing_extensions==4.10.0
153 | tzdata==2024.1
154 | tzlocal==5.2
155 | ujson==5.9.0
156 | Unidecode==1.3.7
157 | unidic==1.1.0
158 | unidic-lite==1.0.8
159 | urllib3==1.26.18
160 | uvicorn==0.28.0
161 | uvloop==0.19.0
162 | wasabi==0.10.1
163 | watchfiles==0.21.0
164 | websockets==11.0.3
165 | Werkzeug==3.0.1
166 | wheel==0.43.0
167 | wrapt==1.16.0
168 | zipp==3.18.1


--------------------------------------------------------------------------------