├── .github └── workflows │ └── docker-publish.yml ├── .gitignore ├── Dockerfile ├── LICENSE ├── README.md ├── app.py └── requirements.txt /.github/workflows/docker-publish.yml: -------------------------------------------------------------------------------- 1 | # This workflow uses actions that are not certified by GitHub. 2 | # They are provided by a third-party and are governed by 3 | # separate terms of service, privacy policy, and support 4 | # documentation. 5 | 6 | # GitHub recommends pinning actions to a commit SHA. 7 | # To get a newer version, you will need to update the SHA. 8 | # You can also reference a tag or branch, but the action may change without warning. 9 | 10 | name: Publish Docker image 11 | 12 | on: 13 | release: 14 | types: [published] 15 | 16 | jobs: 17 | push_to_registry: 18 | name: Push Docker image to Docker Hub 19 | runs-on: ubuntu-latest 20 | steps: 21 | - name: Check out the repo 22 | uses: actions/checkout@v4 23 | 24 | - name: Log in to Docker Hub 25 | uses: docker/login-action@f4ef78c080cd8ba55a85445d5b36e214a81df20a 26 | with: 27 | username: ${{ secrets.DOCKER_USERNAME }} 28 | password: ${{ secrets.DOCKER_PASSWORD }} 29 | 30 | - name: Extract metadata (tags, labels) for Docker 31 | id: meta 32 | uses: docker/metadata-action@9ec57ed1fcdbf14dcef7dfbe97b2010124a938b7 33 | with: 34 | images: timhagel/melotts-api-server 35 | 36 | - name: Build and push Docker image 37 | uses: docker/build-push-action@3b5e8027fcad23fda98b2e3ac259d8d67585f671 38 | with: 39 | context: . 40 | file: ./Dockerfile 41 | push: true 42 | tags: ${{ steps.meta.outputs.tags }} 43 | labels: ${{ steps.meta.outputs.labels }} 44 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ 161 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.9 2 | COPY requirements.txt . 3 | RUN pip install -r requirements.txt 4 | RUN git clone https://github.com/myshell-ai/MeloTTS.git 5 | WORKDIR /MeloTTS 6 | RUN pip install --no-cache-dir -e . 7 | RUN python -m unidic download 8 | WORKDIR / 9 | COPY . . 10 | EXPOSE 8080 11 | CMD ["python", "app.py"] -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Timothy Hagel 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # MeloTTS Docker API Server 2 | 3 | A quick easy way to access [MeloTTS](https://github.com/myshell-ai/MeloTTS) through REST API calls. 4 | 5 | ## Build Image 6 | Assuming you have docker installed and setup. 7 | 8 | (This might take a bit because MeloTTS is a big dependency) 9 | #### Local 10 | 11 | git clone git@github.com:timhagel/MeloTTS-Docker-API-Server.git 12 | cd MeloTTS-Docker-API-Server 13 | docker build -t timhagel/melotts-api-server . 14 | 15 | #### Docker Hub 16 | 17 | docker pull timhagel/melotts-api-server 18 | 19 | ## Languages and Speakers 20 | 21 | #### Language 22 | 23 | - EN - English 24 | - ES - Spanish 25 | - FR - French 26 | - ZH - Chinese 27 | - JP - Japanese 28 | - KR - Korean 29 | 30 | #### Speaker IDs 31 | 32 | - EN-US - American English accent 33 | - EN-BR - British English accent 34 | - EN_INDIA - Indian English accent 35 | - EN-AU - Australian English accent 36 | - EN-Default - Default English accent 37 | - **Notice!** Currently only English accents are working, and other accents are returning an error. This does not mean that other languages do not work! 38 | 39 | ## Running 40 | 41 | ### Run (CPU) (English) 42 | 43 | docker run --name melotts-server -p 8888:8080 -e DEFAULT_SPEED=1 -e DEFAULT_LANGUAGE=EN -e DEFAULT_SPEAKER_ID=EN-Default timhagel/melotts-api-server 44 | 45 | ### Run (GPU) (English) 46 | 47 | docker run --name melotts-server -p 8888:8080 --gpus=all -e DEFAULT_SPEED=1 -e DEFAULT_LANGUAGE=EN -e DEFAULT_SPEAKER_ID=EN-Default timhagel/melotts-api-server 48 | 49 | ## Call API 50 | 51 | **localhost:8888/convert/tts** 52 | 53 | ### Use Environment Defaults 54 | Response: .wav 55 | 56 | ###### Post body: 57 | ``` 58 | { 59 | "text": "Put input here" 60 | } 61 | ``` 62 | 63 | ###### Example curl command: 64 | ```sh 65 | curl http://localhost:8888/convert/tts \ 66 | --header "Content-Type: application/json" \ 67 | -d '{ "text": "Put input here" }' \ 68 | --output "example.wav" 69 | ``` 70 | 71 | ### Customize (Everything except for "text" is optional) 72 | Response: .wav 73 | 74 | ###### Post body: 75 | ``` 76 | { 77 | "text": "input", 78 | "speed": "speed", 79 | "language": "language", 80 | "speaker_id": "speaker_id" 81 | } 82 | ``` 83 | 84 | ###### Example curl command: 85 | ```sh 86 | curl http://localhost:8888/convert/tts \ 87 | --header "Content-Type: application/json" \ 88 | -d '{ 89 | "text": "Put input here", 90 | "speed": "0.5", 91 | "language": "EN", 92 | "speaker_id": "EN-BR" 93 | }' \ 94 | --output "example.wav" 95 | ``` 96 | 97 | ## Acknowledgement 98 | 99 | This is just an API server for the awesome work of [MeloTTS](https://github.com/myshell-ai/MeloTTS) from [MyShell](https://github.com/myshell-ai) 100 | -------------------------------------------------------------------------------- /app.py: -------------------------------------------------------------------------------- 1 | import os 2 | import uvicorn 3 | from fastapi import FastAPI, Body, Depends 4 | from pydantic import BaseModel 5 | from fastapi.responses import FileResponse 6 | from melo.api import TTS 7 | from dotenv import load_dotenv 8 | import tempfile 9 | 10 | load_dotenv() 11 | DEFAULT_SPEED = float(os.getenv("DEFAULT_SPEED")) 12 | DEFAULT_LANGUAGE = os.getenv("DEFAULT_LANGUAGE") 13 | DEFAULT_SPEAKER_ID = os.getenv("DEFAULT_SPEAKER_ID") 14 | device = "auto" # Will automatically use GPU if available 15 | 16 | 17 | class TextModel(BaseModel): 18 | text: str 19 | speed: float = DEFAULT_SPEED 20 | language: str = DEFAULT_LANGUAGE 21 | speaker_id: str = DEFAULT_SPEAKER_ID 22 | 23 | 24 | app = FastAPI() 25 | 26 | 27 | def get_tts_model(body: TextModel): 28 | return TTS(language=body.language, device=device) 29 | 30 | 31 | @app.post("/convert/tts") 32 | async def create_upload_file( 33 | body: TextModel = Body(...), model: TTS = Depends(get_tts_model) 34 | ): 35 | speaker_ids = model.hps.data.spk2id 36 | 37 | print(os.path.basename(body.text)) 38 | 39 | # Use a temporary file 40 | with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp: 41 | output_path = tmp.name 42 | model.tts_to_file( 43 | body.text, speaker_ids[body.speaker_id], output_path, speed=body.speed 44 | ) 45 | 46 | # Return the audio file, ensure the file is not deleted until after the response is sent 47 | response = FileResponse( 48 | output_path, media_type="audio/mpeg", filename=os.path.basename(output_path) 49 | ) 50 | 51 | return response 52 | 53 | 54 | if __name__ == "__main__": 55 | uvicorn.run(app, host="0.0.0.0", port=8080) 56 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | absl-py==2.1.0 2 | aiofiles==23.2.1 3 | altair==5.2.0 4 | annotated-types==0.6.0 5 | anyascii==0.3.2 6 | anyio==4.3.0 7 | attrs==23.2.0 8 | audioread==3.0.1 9 | Babel==2.14.0 10 | boto3==1.34.64 11 | botocore==1.34.64 12 | cached_path==1.6.2 13 | cachetools==5.3.3 14 | certifi==2024.2.2 15 | cffi==1.16.0 16 | charset-normalizer==3.3.2 17 | click==8.1.7 18 | cn2an==0.5.22 19 | colorama==0.4.6 20 | contourpy==1.2.0 21 | cycler==0.12.1 22 | dateparser==1.1.8 23 | decorator==5.1.1 24 | Deprecated==1.2.14 25 | Distance==0.1.3 26 | dnspython==2.6.1 27 | docopt==0.6.2 28 | email_validator==2.1.1 29 | eng-to-ipa==0.0.2 30 | exceptiongroup==1.2.0 31 | fastapi==0.110.0 32 | ffmpy==0.3.2 33 | filelock==3.13.1 34 | fonttools==4.50.0 35 | fsspec==2024.3.0 36 | fugashi==1.3.0 37 | g2p-en==2.1.0 38 | g2pkk==0.1.2 39 | google-api-core==2.17.1 40 | google-auth==2.28.2 41 | google-cloud-core==2.4.1 42 | google-cloud-storage==2.15.0 43 | google-crc32c==1.5.0 44 | google-resumable-media==2.7.0 45 | googleapis-common-protos==1.63.0 46 | gradio==4.21.0 47 | gradio_client==0.12.0 48 | grpcio==1.62.1 49 | gruut==2.2.3 50 | gruut-ipa==0.13.0 51 | gruut-lang-de==2.0.0 52 | gruut-lang-en==2.0.0 53 | gruut-lang-es==2.0.0 54 | gruut-lang-fr==2.0.2 55 | h11==0.14.0 56 | httpcore==1.0.4 57 | httptools==0.6.1 58 | httpx==0.27.0 59 | huggingface-hub==0.21.4 60 | idna==3.6 61 | importlib_metadata==7.0.2 62 | importlib_resources==6.3.1 63 | inflect==7.0.0 64 | itsdangerous==2.1.2 65 | jaconv==0.3.4 66 | jamo==0.4.1 67 | jieba==0.42.1 68 | Jinja2==3.1.3 69 | jmespath==1.0.1 70 | joblib==1.3.2 71 | jsonlines==1.2.0 72 | jsonschema==4.21.1 73 | jsonschema-specifications==2023.12.1 74 | kiwisolver==1.4.5 75 | langid==1.1.6 76 | librosa==0.9.1 77 | llvmlite==0.42.0 78 | loguru==0.7.2 79 | Markdown==3.6 80 | markdown-it-py==3.0.0 81 | MarkupSafe==2.1.5 82 | matplotlib==3.8.3 83 | mdurl==0.1.2 84 | mecab-python3==1.0.5 85 | networkx==2.8.8 86 | nltk==3.8.1 87 | num2words==0.5.12 88 | numba==0.59.0 89 | numpy==1.26.4 90 | nvidia-cublas-cu11==11.10.3.66 91 | nvidia-cuda-nvrtc-cu11==11.7.99 92 | nvidia-cuda-runtime-cu11==11.7.99 93 | nvidia-cudnn-cu11==8.5.0.96 94 | orjson==3.9.15 95 | packaging==24.0 96 | pandas==2.2.1 97 | pillow==10.2.0 98 | pip==23.0.1 99 | plac==1.4.3 100 | platformdirs==4.2.0 101 | pooch==1.8.1 102 | proces==0.1.7 103 | protobuf==4.25.3 104 | pyasn1==0.5.1 105 | pyasn1-modules==0.3.0 106 | pycparser==2.21 107 | pydantic==2.6.4 108 | pydantic_core==2.16.3 109 | pydantic-extra-types==2.6.0 110 | pydantic-settings==2.2.1 111 | pydub==0.25.1 112 | Pygments==2.17.2 113 | pykakasi==2.2.1 114 | pyparsing==3.1.2 115 | pypinyin==0.50.0 116 | python-crfsuite==0.9.10 117 | python-dateutil==2.9.0.post0 118 | python-dotenv==1.0.0 119 | python-multipart==0.0.9 120 | pytz==2024.1 121 | PyYAML==6.0.1 122 | referencing==0.34.0 123 | regex==2023.12.25 124 | requests==2.31.0 125 | resampy==0.4.3 126 | rich==13.7.1 127 | rpds-py==0.18.0 128 | rsa==4.9 129 | ruff==0.3.3 130 | s3transfer==0.10.1 131 | scikit-learn==1.4.1.post1 132 | scipy==1.12.0 133 | semantic-version==2.10.0 134 | setuptools==58.1.0 135 | shellingham==1.5.4 136 | six==1.16.0 137 | sniffio==1.3.1 138 | soundfile==0.12.1 139 | starlette==0.36.3 140 | tensorboard==2.16.2 141 | tensorboard-data-server==0.7.2 142 | threadpoolctl==3.3.0 143 | tokenizers==0.13.3 144 | tomlkit==0.12.0 145 | toolz==0.12.1 146 | torch==1.13.1 147 | torchaudio==0.13.1 148 | tqdm==4.66.2 149 | transformers==4.27.4 150 | txtsplit==1.0.0 151 | typer==0.9.0 152 | typing_extensions==4.10.0 153 | tzdata==2024.1 154 | tzlocal==5.2 155 | ujson==5.9.0 156 | Unidecode==1.3.7 157 | unidic==1.1.0 158 | unidic-lite==1.0.8 159 | urllib3==1.26.18 160 | uvicorn==0.28.0 161 | uvloop==0.19.0 162 | wasabi==0.10.1 163 | watchfiles==0.21.0 164 | websockets==11.0.3 165 | Werkzeug==3.0.1 166 | wheel==0.43.0 167 | wrapt==1.16.0 168 | zipp==3.18.1 --------------------------------------------------------------------------------