├── docker-compose.yml
├── download_model.py
├── LICENSE
├── api.py
├── Dockerfile
├── README.md
└── .gitignore


/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | services:
 2 |   cosyvoice_server:
 3 |     image: cosyvoice
 4 |     container_name: cosyvoice_server
 5 |     ports:
 6 |       - "8080:8080"
 7 |     restart: always
 8 |     runtime: nvidia
 9 |     environment:
10 |       NVIDIA_DRIVER_CAPABILITIES: all
11 |       NVIDIA_VISIBLE_DEVICES: all


--------------------------------------------------------------------------------
/download_model.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- encoding: utf-8 -*-
3 | 
4 | from modelscope import snapshot_download
5 | 
6 | snapshot_download('iic/CosyVoice-300M', local_dir='pretrained_models/CosyVoice-300M')
7 | snapshot_download('iic/CosyVoice-300M-SFT', local_dir='pretrained_models/CosyVoice-300M-SFT')
8 | snapshot_download('iic/CosyVoice-300M-Instruct', local_dir='pretrained_models/CosyVoice-300M-Instruct')
9 | snapshot_download('iic/CosyVoice-ttsfrd', local_dir='pretrained_models/CosyVoice-ttsfrd')


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 小武Alan
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/api.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- encoding: utf-8 -*-
 3 | 
 4 | from fastapi import FastAPI, Form
 5 | import os
 6 | import uuid
 7 | from fastapi.responses import StreamingResponse
 8 | from cosyvoice.cli.cosyvoice import CosyVoice
 9 | from cosyvoice.utils.file_utils import load_wav
10 | import torchaudio
11 | from io import BytesIO
12 | 
13 | cosyvoice = CosyVoice('pretrained_models/CosyVoice-300M-SFT')
14 | print(cosyvoice.list_avaliable_spks())
15 | app = FastAPI()
16 | 
17 | # Text to Speech (TTS) API
18 | @app.post("/v1/tts")
19 | async def tts(
20 |     text: str = Form(None),
21 |     spk: str = Form("中文女"),
22 | ):
23 |     audio_buffer = BytesIO()
24 |     for output in cosyvoice.inference_sft(text, spk, stream=False):
25 |         torchaudio.save(audio_buffer, output['tts_speech'], 22050, format="wav")
26 |     audio_buffer.seek(0)
27 | 
28 |     return StreamingResponse(audio_buffer, media_type="audio/wav", headers={"Content-Disposition": "attachment; filename=output.wav"})
29 | 
30 | if __name__ == "__main__":
31 |     api_port = os.getenv("API_PORT", 8080)
32 |     api_host = os.getenv("API_HOST", "127.0.0.1")
33 |     import uvicorn
34 |     uvicorn.run(app, host=api_host, port=int(api_port))
35 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM nvidia/cuda:12.3.2-cudnn9-runtime-ubuntu22.04 AS base
 2 | RUN apt-get update && apt-get install -y \
 3 |     ffmpeg \
 4 |     tar \
 5 |     wget \
 6 |     git \
 7 |     bash \
 8 |     vim
 9 | 
10 | # Install Miniconda
11 | RUN wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh \
12 |     && mkdir /root/.conda \
13 |     && bash Miniconda3-latest-Linux-x86_64.sh -b \
14 |     && rm -f Miniconda3-latest-Linux-x86_64.sh
15 | ENV PATH="/root/miniconda3/bin:${PATH}"
16 | 
17 | # Install requirements
18 | RUN conda config --add channels conda-forge && \
19 |     conda config --set channel_priority strict
20 | RUN conda create -y -n cosyvoice python=3.8
21 | ENV CONDA_DEFAULT_ENV=cosyvoice
22 | ENV PATH="/root/miniconda3/bin:/opt/conda/envs/cosyvoice/bin:${PATH}"
23 | RUN git clone https://github.com/FunAudioLLM/CosyVoice.git /root/CosyVoice
24 | WORKDIR /root/CosyVoice
25 | RUN git submodule update --init --recursive
26 | RUN pip install -r requirements.txt
27 | 
28 | # Set environment variables
29 | ENV PYTHONPATH=third_party/Matcha-TTS
30 | ENV API_HOST=0.0.0.0
31 | ENV API_PORT=8080
32 | 
33 | # Run
34 | COPY download_model.py .
35 | RUN python download_model.py
36 | COPY api.py .
37 | CMD ["python", "api.py"]


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # CosyVoiceDocker
 2 | 
 3 | This repository provides a Docker image for [CosyVoice](https://github.com/FunAudioLLM/CosyVoice), enabling you to deploy the CosyVoice ASR service within a Docker container.
 4 | 
 5 | ## Usage
 6 | 
 7 | To run this Docker container, you’ll need a machine with NVIDIA GPU support and the NVIDIA Container Toolkit installed. For detailed installation steps, please refer to the [NVIDIA Container Toolkit](https://notes.xiaowu.ai/%E5%BC%80%E5%8F%91%E7%AC%94%E8%AE%B0/AI/NVIDIA#%E5%AE%89%E8%A3%85+NVIDIA+Container+Toolkit) guide.
 8 | 
 9 | ### Build the Docker image
10 | 
11 | ```shell
12 | $ docker build -t cosyvoice .
13 | ```
14 | 
15 | ### Using docker command
16 | 
17 | ```shell
18 | $ docker run -d --name cosyvoice_server -p 8080:8080 \
19 |          --runtime=nvidia \
20 |          -e NVIDIA_DRIVER_CAPABILITIES=all \
21 |          -e NVIDIA_VISIBLE_DEVICES=all \
22 |          cosyvoice
23 | ```
24 | 
25 | ### Using docker compose
26 | 
27 | 1. Create a `docker-compose.yml` file:
28 | ```yaml
29 | services:
30 |   cosyvoice_server:
31 |     image: cosyvoice
32 |     container_name: cosyvoice_server
33 |     ports:
34 |       - "8080:8080"
35 |     restart: always
36 |     runtime: nvidia
37 |     environment:
38 |       NVIDIA_DRIVER_CAPABILITIES: all
39 |       NVIDIA_VISIBLE_DEVICES: all
40 | ```
41 | 2. Start the container:
42 | ```shell
43 | $ docker compose up -d
44 | ```
45 | 
46 | ## Testing
47 | 
48 | To test the API, use `curl`:
49 | 
50 | ```shell
51 | curl -X POST \
52 |   "http://127.0.0.1:8080/v1/tts" \
53 |   -F "text=你好，欢迎使用语音合成服务" \
54 |   -F "spk=中文女" \
55 |   --output output.wav
56 | ```


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
110 | .pdm.toml
111 | .pdm-python
112 | .pdm-build/
113 | 
114 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
115 | __pypackages__/
116 | 
117 | # Celery stuff
118 | celerybeat-schedule
119 | celerybeat.pid
120 | 
121 | # SageMath parsed files
122 | *.sage.py
123 | 
124 | # Environments
125 | .env
126 | .venv
127 | env/
128 | venv/
129 | ENV/
130 | env.bak/
131 | venv.bak/
132 | 
133 | # Spyder project settings
134 | .spyderproject
135 | .spyproject
136 | 
137 | # Rope project settings
138 | .ropeproject
139 | 
140 | # mkdocs documentation
141 | /site
142 | 
143 | # mypy
144 | .mypy_cache/
145 | .dmypy.json
146 | dmypy.json
147 | 
148 | # Pyre type checker
149 | .pyre/
150 | 
151 | # pytype static type analyzer
152 | .pytype/
153 | 
154 | # Cython debug symbols
155 | cython_debug/
156 | 
157 | # PyCharm
158 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
159 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
160 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
161 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
162 | #.idea/
163 | 


--------------------------------------------------------------------------------