├── .github
    └── workflows
    │   └── build-and-push-to-ghcr.yml
├── .gitignore
├── LICENSE
├── README.md
├── demo.py
├── server
    ├── Dockerfile
    ├── Dockerfile.cpu
    ├── Dockerfile.cuda121
    ├── main.py
    ├── requirements.txt
    └── requirements_cpu.txt
└── test
    ├── default_speaker.json
    ├── requirements.txt
    └── test_streaming.py


/.github/workflows/build-and-push-to-ghcr.yml:
--------------------------------------------------------------------------------
  1 | name: Build and push to GHCR
  2 | on:
  3 |   push:
  4 |     branches: [main]
  5 |   pull_request:
  6 | jobs:
  7 |   build-and-push-to-ghcr-cuda118:
  8 |     runs-on: ubuntu-22.04
  9 |     steps:
 10 |       -
 11 |         name: Checkout
 12 |         uses: actions/checkout@v3
 13 | 
 14 |       -
 15 |         name: Set up Docker Buildx
 16 |         uses: docker/setup-buildx-action@v3
 17 | 
 18 |       - name: 'Login to GitHub Container Registry'
 19 |         run: |
 20 |           set -xe
 21 |           docker login --username ${{ github.actor }} --password ${{ secrets.GITHUB_TOKEN }} ghcr.io
 22 | 
 23 |       - name: 'Remove cache'
 24 |         run: | 
 25 |           sudo rm -rf /usr/share/dotnet
 26 |           sudo rm -rf /opt/ghc
 27 |           sudo rm -rf "/usr/local/share/boost"
 28 |           sudo rm -rf "$AGENT_TOOLSDIRECTORY"
 29 | 
 30 |       - name: Build only for PR Cuda 11.8
 31 |         if: github.ref != 'refs/heads/main'
 32 |         uses: docker/build-push-action@v5
 33 |         with:
 34 |           context: "{{defaultContext}}:server"
 35 |           file: Dockerfile
 36 |           push: false # Do not push image for PR
 37 |           cache-from: type=registry,ref=ghcr.io/coqui-ai/xtts-streaming-server:cache-latest; type=registry,ref=ghcr.io/coqui-ai/xtts-streaming-server:cache-pr-${{ github.event.number }}
 38 |           cache-to: type=registry,ref=ghcr.io/coqui-ai/xtts-streaming-server:cache-pr-${{ github.event.number }}
 39 | 
 40 |       - name: Build and Push image Cuda 11.8
 41 |         if: github.ref == 'refs/heads/main'
 42 |         uses: docker/build-push-action@v5
 43 |         with:
 44 |           context: "{{defaultContext}}:server"
 45 |           file: Dockerfile
 46 |           push: true # Push if merged
 47 |           cache-from: type=registry,ref=ghcr.io/coqui-ai/xtts-streaming-server:cache-latest
 48 |           cache-to: type=registry,ref=ghcr.io/coqui-ai/xtts-streaming-server:cache-latest
 49 |           tags: ghcr.io/coqui-ai/xtts-streaming-server:latest, ghcr.io/coqui-ai/xtts-streaming-server:main-${{ github.sha }}
 50 |           #build-args:
 51 | 
 52 |   build-and-push-to-ghcr-cuda121:
 53 |     runs-on: ubuntu-22.04
 54 |     steps:
 55 |       -
 56 |         name: Checkout
 57 |         uses: actions/checkout@v3
 58 | 
 59 |       -
 60 |         name: Set up Docker Buildx
 61 |         uses: docker/setup-buildx-action@v3
 62 | 
 63 |       - name: 'Login to GitHub Container Registry'
 64 |         run: |
 65 |           set -xe
 66 |           docker login --username ${{ github.actor }} --password ${{ secrets.GITHUB_TOKEN }} ghcr.io
 67 | 
 68 |       - name: 'Remove cache'
 69 |         run: | 
 70 |           sudo rm -rf /usr/share/dotnet
 71 |           sudo rm -rf /opt/ghc
 72 |           sudo rm -rf "/usr/local/share/boost"
 73 |           sudo rm -rf "$AGENT_TOOLSDIRECTORY"
 74 | 
 75 |       - name: Build only for PR cuda 12.1
 76 |         if: github.ref != 'refs/heads/main'
 77 |         uses: docker/build-push-action@v5
 78 |         with:
 79 |           context: "{{defaultContext}}:server"
 80 |           file: Dockerfile.cuda121
 81 |           push: false # Do not push image for PR
 82 |           cache-from: type=registry,ref=ghcr.io/coqui-ai/xtts-streaming-server:cache-latest-cuda121; type=registry,ref=ghcr.io/coqui-ai/xtts-streaming-server:cache-pr-cuda121-${{ github.event.number }}
 83 |           cache-to: type=registry,ref=ghcr.io/coqui-ai/xtts-streaming-server:cache-pr-cuda121-${{ github.event.number }}
 84 | 
 85 |       - name: Build and Push image cuda 12.1
 86 |         if: github.ref == 'refs/heads/main'
 87 |         uses: docker/build-push-action@v5
 88 |         with:
 89 |           context: "{{defaultContext}}:server"
 90 |           file: Dockerfile.cuda121
 91 |           push: true # Push if merged
 92 |           cache-from: type=registry,ref=ghcr.io/coqui-ai/xtts-streaming-server:cache-latest-cuda121
 93 |           cache-to: type=registry,ref=ghcr.io/coqui-ai/xtts-streaming-server:cache-latest-cuda121
 94 |           tags: ghcr.io/coqui-ai/xtts-streaming-server:latest-cuda121, ghcr.io/coqui-ai/xtts-streaming-server:main-cuda121-${{ github.sha }}
 95 |           #build-args:
 96 |   build-and-push-to-ghcr-cpu:
 97 |     runs-on: ubuntu-22.04
 98 |     steps:
 99 |       -
100 |         name: Checkout
101 |         uses: actions/checkout@v3
102 | 
103 |       -
104 |         name: Set up Docker Buildx
105 |         uses: docker/setup-buildx-action@v3
106 | 
107 |       - name: 'Login to GitHub Container Registry'
108 |         run: |
109 |           set -xe
110 |           docker login --username ${{ github.actor }} --password ${{ secrets.GITHUB_TOKEN }} ghcr.io
111 | 
112 |       - name: 'Remove cache'
113 |         run: | 
114 |           sudo rm -rf /usr/share/dotnet
115 |           sudo rm -rf /opt/ghc
116 |           sudo rm -rf "/usr/local/share/boost"
117 |           sudo rm -rf "$AGENT_TOOLSDIRECTORY"
118 | 
119 |       - name: Build only for PR CPU
120 |         if: github.ref != 'refs/heads/main'
121 |         uses: docker/build-push-action@v5
122 |         with:
123 |           context: "{{defaultContext}}:server"
124 |           file: Dockerfile.cpu
125 |           push: false # Do not push image for PR
126 |           cache-from: type=registry,ref=ghcr.io/coqui-ai/xtts-streaming-server:cache-latest-cpu; type=registry,ref=ghcr.io/coqui-ai/xtts-streaming-server:cache-pr-cuda121-${{ github.event.number }}
127 |           cache-to: type=registry,ref=ghcr.io/coqui-ai/xtts-streaming-server:cache-pr-cpu-${{ github.event.number }}
128 | 
129 |       - name: Build and Push image CPU
130 |         if: github.ref == 'refs/heads/main'
131 |         uses: docker/build-push-action@v5
132 |         with:
133 |           context: "{{defaultContext}}:server"
134 |           file: Dockerfile.cpu
135 |           push: true # Push if merged
136 |           cache-from: type=registry,ref=ghcr.io/coqui-ai/xtts-streaming-server:cache-latest-cpu
137 |           cache-to: type=registry,ref=ghcr.io/coqui-ai/xtts-streaming-server:cache-latest-cpu
138 |           tags: ghcr.io/coqui-ai/xtts-streaming-server:latest-cpu, ghcr.io/coqui-ai/xtts-streaming-server:main-cpu-${{ github.sha }}
139 |           #build-args:
140 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | demo_outputs


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | Mozilla Public License Version 2.0
  2 | ==================================
  3 | 
  4 | 1. Definitions
  5 | --------------
  6 | 
  7 | 1.1. "Contributor"
  8 |     means each individual or legal entity that creates, contributes to
  9 |     the creation of, or owns Covered Software.
 10 | 
 11 | 1.2. "Contributor Version"
 12 |     means the combination of the Contributions of others (if any) used
 13 |     by a Contributor and that particular Contributor's Contribution.
 14 | 
 15 | 1.3. "Contribution"
 16 |     means Covered Software of a particular Contributor.
 17 | 
 18 | 1.4. "Covered Software"
 19 |     means Source Code Form to which the initial Contributor has attached
 20 |     the notice in Exhibit A, the Executable Form of such Source Code
 21 |     Form, and Modifications of such Source Code Form, in each case
 22 |     including portions thereof.
 23 | 
 24 | 1.5. "Incompatible With Secondary Licenses"
 25 |     means
 26 | 
 27 |     (a) that the initial Contributor has attached the notice described
 28 |         in Exhibit B to the Covered Software; or
 29 | 
 30 |     (b) that the Covered Software was made available under the terms of
 31 |         version 1.1 or earlier of the License, but not also under the
 32 |         terms of a Secondary License.
 33 | 
 34 | 1.6. "Executable Form"
 35 |     means any form of the work other than Source Code Form.
 36 | 
 37 | 1.7. "Larger Work"
 38 |     means a work that combines Covered Software with other material, in 
 39 |     a separate file or files, that is not Covered Software.
 40 | 
 41 | 1.8. "License"
 42 |     means this document.
 43 | 
 44 | 1.9. "Licensable"
 45 |     means having the right to grant, to the maximum extent possible,
 46 |     whether at the time of the initial grant or subsequently, any and
 47 |     all of the rights conveyed by this License.
 48 | 
 49 | 1.10. "Modifications"
 50 |     means any of the following:
 51 | 
 52 |     (a) any file in Source Code Form that results from an addition to,
 53 |         deletion from, or modification of the contents of Covered
 54 |         Software; or
 55 | 
 56 |     (b) any new file in Source Code Form that contains any Covered
 57 |         Software.
 58 | 
 59 | 1.11. "Patent Claims" of a Contributor
 60 |     means any patent claim(s), including without limitation, method,
 61 |     process, and apparatus claims, in any patent Licensable by such
 62 |     Contributor that would be infringed, but for the grant of the
 63 |     License, by the making, using, selling, offering for sale, having
 64 |     made, import, or transfer of either its Contributions or its
 65 |     Contributor Version.
 66 | 
 67 | 1.12. "Secondary License"
 68 |     means either the GNU General Public License, Version 2.0, the GNU
 69 |     Lesser General Public License, Version 2.1, the GNU Affero General
 70 |     Public License, Version 3.0, or any later versions of those
 71 |     licenses.
 72 | 
 73 | 1.13. "Source Code Form"
 74 |     means the form of the work preferred for making modifications.
 75 | 
 76 | 1.14. "You" (or "Your")
 77 |     means an individual or a legal entity exercising rights under this
 78 |     License. For legal entities, "You" includes any entity that
 79 |     controls, is controlled by, or is under common control with You. For
 80 |     purposes of this definition, "control" means (a) the power, direct
 81 |     or indirect, to cause the direction or management of such entity,
 82 |     whether by contract or otherwise, or (b) ownership of more than
 83 |     fifty percent (50%) of the outstanding shares or beneficial
 84 |     ownership of such entity.
 85 | 
 86 | 2. License Grants and Conditions
 87 | --------------------------------
 88 | 
 89 | 2.1. Grants
 90 | 
 91 | Each Contributor hereby grants You a world-wide, royalty-free,
 92 | non-exclusive license:
 93 | 
 94 | (a) under intellectual property rights (other than patent or trademark)
 95 |     Licensable by such Contributor to use, reproduce, make available,
 96 |     modify, display, perform, distribute, and otherwise exploit its
 97 |     Contributions, either on an unmodified basis, with Modifications, or
 98 |     as part of a Larger Work; and
 99 | 
100 | (b) under Patent Claims of such Contributor to make, use, sell, offer
101 |     for sale, have made, import, and otherwise transfer either its
102 |     Contributions or its Contributor Version.
103 | 
104 | 2.2. Effective Date
105 | 
106 | The licenses granted in Section 2.1 with respect to any Contribution
107 | become effective for each Contribution on the date the Contributor first
108 | distributes such Contribution.
109 | 
110 | 2.3. Limitations on Grant Scope
111 | 
112 | The licenses granted in this Section 2 are the only rights granted under
113 | this License. No additional rights or licenses will be implied from the
114 | distribution or licensing of Covered Software under this License.
115 | Notwithstanding Section 2.1(b) above, no patent license is granted by a
116 | Contributor:
117 | 
118 | (a) for any code that a Contributor has removed from Covered Software;
119 |     or
120 | 
121 | (b) for infringements caused by: (i) Your and any other third party's
122 |     modifications of Covered Software, or (ii) the combination of its
123 |     Contributions with other software (except as part of its Contributor
124 |     Version); or
125 | 
126 | (c) under Patent Claims infringed by Covered Software in the absence of
127 |     its Contributions.
128 | 
129 | This License does not grant any rights in the trademarks, service marks,
130 | or logos of any Contributor (except as may be necessary to comply with
131 | the notice requirements in Section 3.4).
132 | 
133 | 2.4. Subsequent Licenses
134 | 
135 | No Contributor makes additional grants as a result of Your choice to
136 | distribute the Covered Software under a subsequent version of this
137 | License (see Section 10.2) or under the terms of a Secondary License (if
138 | permitted under the terms of Section 3.3).
139 | 
140 | 2.5. Representation
141 | 
142 | Each Contributor represents that the Contributor believes its
143 | Contributions are its original creation(s) or it has sufficient rights
144 | to grant the rights to its Contributions conveyed by this License.
145 | 
146 | 2.6. Fair Use
147 | 
148 | This License is not intended to limit any rights You have under
149 | applicable copyright doctrines of fair use, fair dealing, or other
150 | equivalents.
151 | 
152 | 2.7. Conditions
153 | 
154 | Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted
155 | in Section 2.1.
156 | 
157 | 3. Responsibilities
158 | -------------------
159 | 
160 | 3.1. Distribution of Source Form
161 | 
162 | All distribution of Covered Software in Source Code Form, including any
163 | Modifications that You create or to which You contribute, must be under
164 | the terms of this License. You must inform recipients that the Source
165 | Code Form of the Covered Software is governed by the terms of this
166 | License, and how they can obtain a copy of this License. You may not
167 | attempt to alter or restrict the recipients' rights in the Source Code
168 | Form.
169 | 
170 | 3.2. Distribution of Executable Form
171 | 
172 | If You distribute Covered Software in Executable Form then:
173 | 
174 | (a) such Covered Software must also be made available in Source Code
175 |     Form, as described in Section 3.1, and You must inform recipients of
176 |     the Executable Form how they can obtain a copy of such Source Code
177 |     Form by reasonable means in a timely manner, at a charge no more
178 |     than the cost of distribution to the recipient; and
179 | 
180 | (b) You may distribute such Executable Form under the terms of this
181 |     License, or sublicense it under different terms, provided that the
182 |     license for the Executable Form does not attempt to limit or alter
183 |     the recipients' rights in the Source Code Form under this License.
184 | 
185 | 3.3. Distribution of a Larger Work
186 | 
187 | You may create and distribute a Larger Work under terms of Your choice,
188 | provided that You also comply with the requirements of this License for
189 | the Covered Software. If the Larger Work is a combination of Covered
190 | Software with a work governed by one or more Secondary Licenses, and the
191 | Covered Software is not Incompatible With Secondary Licenses, this
192 | License permits You to additionally distribute such Covered Software
193 | under the terms of such Secondary License(s), so that the recipient of
194 | the Larger Work may, at their option, further distribute the Covered
195 | Software under the terms of either this License or such Secondary
196 | License(s).
197 | 
198 | 3.4. Notices
199 | 
200 | You may not remove or alter the substance of any license notices
201 | (including copyright notices, patent notices, disclaimers of warranty,
202 | or limitations of liability) contained within the Source Code Form of
203 | the Covered Software, except that You may alter any license notices to
204 | the extent required to remedy known factual inaccuracies.
205 | 
206 | 3.5. Application of Additional Terms
207 | 
208 | You may choose to offer, and to charge a fee for, warranty, support,
209 | indemnity or liability obligations to one or more recipients of Covered
210 | Software. However, You may do so only on Your own behalf, and not on
211 | behalf of any Contributor. You must make it absolutely clear that any
212 | such warranty, support, indemnity, or liability obligation is offered by
213 | You alone, and You hereby agree to indemnify every Contributor for any
214 | liability incurred by such Contributor as a result of warranty, support,
215 | indemnity or liability terms You offer. You may include additional
216 | disclaimers of warranty and limitations of liability specific to any
217 | jurisdiction.
218 | 
219 | 4. Inability to Comply Due to Statute or Regulation
220 | ---------------------------------------------------
221 | 
222 | If it is impossible for You to comply with any of the terms of this
223 | License with respect to some or all of the Covered Software due to
224 | statute, judicial order, or regulation then You must: (a) comply with
225 | the terms of this License to the maximum extent possible; and (b)
226 | describe the limitations and the code they affect. Such description must
227 | be placed in a text file included with all distributions of the Covered
228 | Software under this License. Except to the extent prohibited by statute
229 | or regulation, such description must be sufficiently detailed for a
230 | recipient of ordinary skill to be able to understand it.
231 | 
232 | 5. Termination
233 | --------------
234 | 
235 | 5.1. The rights granted under this License will terminate automatically
236 | if You fail to comply with any of its terms. However, if You become
237 | compliant, then the rights granted under this License from a particular
238 | Contributor are reinstated (a) provisionally, unless and until such
239 | Contributor explicitly and finally terminates Your grants, and (b) on an
240 | ongoing basis, if such Contributor fails to notify You of the
241 | non-compliance by some reasonable means prior to 60 days after You have
242 | come back into compliance. Moreover, Your grants from a particular
243 | Contributor are reinstated on an ongoing basis if such Contributor
244 | notifies You of the non-compliance by some reasonable means, this is the
245 | first time You have received notice of non-compliance with this License
246 | from such Contributor, and You become compliant prior to 30 days after
247 | Your receipt of the notice.
248 | 
249 | 5.2. If You initiate litigation against any entity by asserting a patent
250 | infringement claim (excluding declaratory judgment actions,
251 | counter-claims, and cross-claims) alleging that a Contributor Version
252 | directly or indirectly infringes any patent, then the rights granted to
253 | You by any and all Contributors for the Covered Software under Section
254 | 2.1 of this License shall terminate.
255 | 
256 | 5.3. In the event of termination under Sections 5.1 or 5.2 above, all
257 | end user license agreements (excluding distributors and resellers) which
258 | have been validly granted by You or Your distributors under this License
259 | prior to termination shall survive termination.
260 | 
261 | ************************************************************************
262 | *                                                                      *
263 | *  6. Disclaimer of Warranty                                           *
264 | *  -------------------------                                           *
265 | *                                                                      *
266 | *  Covered Software is provided under this License on an "as is"       *
267 | *  basis, without warranty of any kind, either expressed, implied, or  *
268 | *  statutory, including, without limitation, warranties that the       *
269 | *  Covered Software is free of defects, merchantable, fit for a        *
270 | *  particular purpose or non-infringing. The entire risk as to the     *
271 | *  quality and performance of the Covered Software is with You.        *
272 | *  Should any Covered Software prove defective in any respect, You     *
273 | *  (not any Contributor) assume the cost of any necessary servicing,   *
274 | *  repair, or correction. This disclaimer of warranty constitutes an   *
275 | *  essential part of this License. No use of any Covered Software is   *
276 | *  authorized under this License except under this disclaimer.         *
277 | *                                                                      *
278 | ************************************************************************
279 | 
280 | ************************************************************************
281 | *                                                                      *
282 | *  7. Limitation of Liability                                          *
283 | *  --------------------------                                          *
284 | *                                                                      *
285 | *  Under no circumstances and under no legal theory, whether tort      *
286 | *  (including negligence), contract, or otherwise, shall any           *
287 | *  Contributor, or anyone who distributes Covered Software as          *
288 | *  permitted above, be liable to You for any direct, indirect,         *
289 | *  special, incidental, or consequential damages of any character      *
290 | *  including, without limitation, damages for lost profits, loss of    *
291 | *  goodwill, work stoppage, computer failure or malfunction, or any    *
292 | *  and all other commercial damages or losses, even if such party      *
293 | *  shall have been informed of the possibility of such damages. This   *
294 | *  limitation of liability shall not apply to liability for death or   *
295 | *  personal injury resulting from such party's negligence to the       *
296 | *  extent applicable law prohibits such limitation. Some               *
297 | *  jurisdictions do not allow the exclusion or limitation of           *
298 | *  incidental or consequential damages, so this exclusion and          *
299 | *  limitation may not apply to You.                                    *
300 | *                                                                      *
301 | ************************************************************************
302 | 
303 | 8. Litigation
304 | -------------
305 | 
306 | Any litigation relating to this License may be brought only in the
307 | courts of a jurisdiction where the defendant maintains its principal
308 | place of business and such litigation shall be governed by laws of that
309 | jurisdiction, without reference to its conflict-of-law provisions.
310 | Nothing in this Section shall prevent a party's ability to bring
311 | cross-claims or counter-claims.
312 | 
313 | 9. Miscellaneous
314 | ----------------
315 | 
316 | This License represents the complete agreement concerning the subject
317 | matter hereof. If any provision of this License is held to be
318 | unenforceable, such provision shall be reformed only to the extent
319 | necessary to make it enforceable. Any law or regulation which provides
320 | that the language of a contract shall be construed against the drafter
321 | shall not be used to construe this License against a Contributor.
322 | 
323 | 10. Versions of the License
324 | ---------------------------
325 | 
326 | 10.1. New Versions
327 | 
328 | Mozilla Foundation is the license steward. Except as provided in Section
329 | 10.3, no one other than the license steward has the right to modify or
330 | publish new versions of this License. Each version will be given a
331 | distinguishing version number.
332 | 
333 | 10.2. Effect of New Versions
334 | 
335 | You may distribute the Covered Software under the terms of the version
336 | of the License under which You originally received the Covered Software,
337 | or under the terms of any subsequent version published by the license
338 | steward.
339 | 
340 | 10.3. Modified Versions
341 | 
342 | If you create software not governed by this License, and you want to
343 | create a new license for such software, you may create and use a
344 | modified version of this License if you rename the license and remove
345 | any references to the name of the license steward (except to note that
346 | such modified license differs from this License).
347 | 
348 | 10.4. Distributing Source Code Form that is Incompatible With Secondary
349 | Licenses
350 | 
351 | If You choose to distribute Source Code Form that is Incompatible With
352 | Secondary Licenses under the terms of this version of the License, the
353 | notice described in Exhibit B of this License must be attached.
354 | 
355 | Exhibit A - Source Code Form License Notice
356 | -------------------------------------------
357 | 
358 |   This Source Code Form is subject to the terms of the Mozilla Public
359 |   License, v. 2.0. If a copy of the MPL was not distributed with this
360 |   file, You can obtain one at http://mozilla.org/MPL/2.0/.
361 | 
362 | If it is not possible or desirable to put the notice in a particular
363 | file, then You may include the notice in a location (such as a LICENSE
364 | file in a relevant directory) where a recipient would be likely to look
365 | for such a notice.
366 | 
367 | You may add additional accurate notices of copyright ownership.
368 | 
369 | Exhibit B - "Incompatible With Secondary Licenses" Notice
370 | ---------------------------------------------------------
371 | 
372 |   This Source Code Form is "Incompatible With Secondary Licenses", as
373 |   defined by the Mozilla Public License, v. 2.0.
374 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # XTTS streaming server
 2 | *Warning: XTTS-streaming-server doesn't support concurrent streaming requests, it's a demo server, not meant for production.*
 3 | 
 4 | https://github.com/coqui-ai/xtts-streaming-server/assets/17219561/7220442a-e88a-4288-8a73-608c4b39d06c
 5 | 
 6 | 
 7 | ## 1) Run the server
 8 | 
 9 | ### Use a pre-built image
10 | 
11 | CUDA 12.1:
12 | 
13 | ```bash
14 | $ docker run --gpus=all -e COQUI_TOS_AGREED=1 --rm -p 8000:80 ghcr.io/coqui-ai/xtts-streaming-server:latest-cuda121
15 | ```
16 | 
17 | CUDA 11.8 (for older cards):
18 | 
19 | ```bash
20 | $ docker run --gpus=all -e COQUI_TOS_AGREED=1 --rm -p 8000:80 ghcr.io/coqui-ai/xtts-streaming-server:latest
21 | ```
22 | 
23 | CPU (not recommended):
24 | 
25 | ```bash
26 | $ docker run -e COQUI_TOS_AGREED=1 --rm -p 8000:80 ghcr.io/coqui-ai/xtts-streaming-server:latest-cpu
27 | ```
28 | 
29 | Run with a fine-tuned model:
30 | 
31 | Make sure the model folder `/path/to/model/folder`  contains the following files:
32 | - `config.json`
33 | - `model.pth`
34 | - `vocab.json`
35 | 
36 | ```bash
37 | $ docker run -v /path/to/model/folder:/app/tts_models --gpus=all -e COQUI_TOS_AGREED=1  --rm -p 8000:80 ghcr.io/coqui-ai/xtts-streaming-server:latest`
38 | ```
39 | 
40 | Setting the `COQUI_TOS_AGREED` environment variable to `1` indicates you have read and agreed to
41 | the terms of the [CPML license](https://coqui.ai/cpml). (Fine-tuned XTTS models also are under the [CPML license](https://coqui.ai/cpml))
42 | 
43 | ### Build the image yourself
44 | 
45 | To build the Docker container Pytorch 2.1 and CUDA 11.8 :
46 | 
47 | `DOCKERFILE` may be `Dockerfile`, `Dockerfile.cpu`, `Dockerfile.cuda121`, or your own custom Dockerfile.
48 | 
49 | ```bash
50 | $ git clone git@github.com:coqui-ai/xtts-streaming-server.git
51 | $ cd xtts-streaming-server/server
52 | $ docker build -t xtts-stream . -f DOCKERFILE
53 | $ docker run --gpus all -e COQUI_TOS_AGREED=1 --rm -p 8000:80 xtts-stream
54 | ```
55 | 
56 | Setting the `COQUI_TOS_AGREED` environment variable to `1` indicates you have read and agreed to
57 | the terms of the [CPML license](https://coqui.ai/cpml). (Fine-tuned XTTS models also are under the [CPML license](https://coqui.ai/cpml))
58 | 
59 | ## 2) Testing the running server
60 | 
61 | Once your Docker container is running, you can test that it's working properly. You will need to run the following code from a fresh terminal.
62 | 
63 | ### Clone `xtts-streaming-server` if you haven't already
64 | 
65 | ```bash
66 | $ git clone git@github.com:coqui-ai/xtts-streaming-server.git
67 | ```
68 | 
69 | ### Using the gradio demo
70 | 
71 | ```bash
72 | $ cd xtts-streaming-server
73 | $ python -m pip install -r test/requirements.txt
74 | $ python demo.py
75 | ```
76 | 
77 | ### Using the test script
78 | 
79 | ```bash
80 | $ cd xtts-streaming-server/test
81 | $ python -m pip install -r requirements.txt
82 | $ python test_streaming.py
83 | ```
84 | 


--------------------------------------------------------------------------------
/demo.py:
--------------------------------------------------------------------------------
  1 | import gradio as gr
  2 | import requests
  3 | import base64
  4 | import tempfile
  5 | import json
  6 | import os
  7 | 
  8 | 
  9 | SERVER_URL = 'http://localhost:8000'
 10 | OUTPUT = "./demo_outputs"
 11 | cloned_speakers = {}
 12 | 
 13 | print("Preparing file structure...")
 14 | if not os.path.exists(OUTPUT):
 15 |     os.mkdir(OUTPUT)
 16 |     os.mkdir(os.path.join(OUTPUT, "cloned_speakers"))
 17 |     os.mkdir(os.path.join(OUTPUT, "generated_audios"))
 18 | elif os.path.exists(os.path.join(OUTPUT, "cloned_speakers")):
 19 |     print("Loading existing cloned speakers...")
 20 |     for file in os.listdir(os.path.join(OUTPUT, "cloned_speakers")):
 21 |         if file.endswith(".json"):
 22 |             with open(os.path.join(OUTPUT, "cloned_speakers", file), "r") as fp:
 23 |                 cloned_speakers[file[:-5]] = json.load(fp)
 24 |     print("Available cloned speakers:", ", ".join(cloned_speakers.keys()))
 25 | 
 26 | try:
 27 |     print("Getting metadata from server ...")
 28 |     LANUGAGES = requests.get(SERVER_URL + "/languages").json()
 29 |     print("Available languages:", ", ".join(LANUGAGES))
 30 |     STUDIO_SPEAKERS = requests.get(SERVER_URL + "/studio_speakers").json()
 31 |     print("Available studio speakers:", ", ".join(STUDIO_SPEAKERS.keys()))
 32 | except:
 33 |     raise Exception("Please make sure the server is running first.")
 34 | 
 35 | 
 36 | def clone_speaker(upload_file, clone_speaker_name, cloned_speaker_names):
 37 |     files = {"wav_file": ("reference.wav", open(upload_file, "rb"))}
 38 |     embeddings = requests.post(SERVER_URL + "/clone_speaker", files=files).json()
 39 |     with open(os.path.join(OUTPUT, "cloned_speakers", clone_speaker_name + ".json"), "w") as fp:
 40 |         json.dump(embeddings, fp)
 41 |     cloned_speakers[clone_speaker_name] = embeddings
 42 |     cloned_speaker_names.append(clone_speaker_name)
 43 |     return upload_file, clone_speaker_name, cloned_speaker_names, gr.Dropdown.update(choices=cloned_speaker_names)
 44 | 
 45 | def tts(text, speaker_type, speaker_name_studio, speaker_name_custom, lang):
 46 |     embeddings = STUDIO_SPEAKERS[speaker_name_studio] if speaker_type == 'Studio' else cloned_speakers[speaker_name_custom]
 47 |     generated_audio = requests.post(
 48 |         SERVER_URL + "/tts",
 49 |         json={
 50 |             "text": text,
 51 |             "language": lang,
 52 |             "speaker_embedding": embeddings["speaker_embedding"],
 53 |             "gpt_cond_latent": embeddings["gpt_cond_latent"]
 54 |         }
 55 |     ).content
 56 |     generated_audio_path = os.path.join("demo_outputs", "generated_audios", next(tempfile._get_candidate_names()) + ".wav")
 57 |     with open(generated_audio_path, "wb") as fp:
 58 |         fp.write(base64.b64decode(generated_audio))
 59 |         return fp.name
 60 | 
 61 | with gr.Blocks() as demo:
 62 |     cloned_speaker_names = gr.State(list(cloned_speakers.keys()))
 63 |     with gr.Tab("TTS"):
 64 |         with gr.Column() as row4:
 65 |             with gr.Row() as col4:
 66 |                 speaker_name_studio = gr.Dropdown(
 67 |                     label="Studio speaker",
 68 |                     choices=STUDIO_SPEAKERS.keys(),
 69 |                     value="Asya Anara" if "Asya Anara" in STUDIO_SPEAKERS.keys() else None,
 70 |                 )
 71 |                 speaker_name_custom = gr.Dropdown(
 72 |                     label="Cloned speaker",
 73 |                     choices=cloned_speaker_names.value,
 74 |                     value=cloned_speaker_names.value[0] if len(cloned_speaker_names.value) != 0 else None,
 75 |                 )
 76 |             speaker_type = gr.Dropdown(label="Speaker type", choices=["Studio", "Cloned"], value="Studio")
 77 |         with gr.Column() as col2:
 78 |             lang = gr.Dropdown(label="Language", choices=LANUGAGES, value="en")
 79 |             text = gr.Textbox(label="text", value="A quick brown fox jumps over the lazy dog.")
 80 |             tts_button = gr.Button(value="TTS")
 81 |         with gr.Column() as col3:
 82 |             generated_audio = gr.Audio(label="Generated audio", autoplay=True)
 83 |     with gr.Tab("Clone a new speaker"):
 84 |         with gr.Column() as col1:
 85 |             upload_file = gr.Audio(label="Upload reference audio", type="filepath")
 86 |             clone_speaker_name = gr.Textbox(label="Speaker name", value="default_speaker")
 87 |             clone_button = gr.Button(value="Clone speaker")
 88 | 
 89 |     clone_button.click(
 90 |         fn=clone_speaker,
 91 |         inputs=[upload_file, clone_speaker_name, cloned_speaker_names],
 92 |         outputs=[upload_file, clone_speaker_name, cloned_speaker_names, speaker_name_custom],
 93 |     )
 94 | 
 95 |     tts_button.click(
 96 |         fn=tts,
 97 |         inputs=[text, speaker_type, speaker_name_studio, speaker_name_custom, lang],
 98 |         outputs=[generated_audio],
 99 |     )
100 | 
101 | if __name__ == "__main__":
102 |     print("Warming up server...")
103 |     with open("test/default_speaker.json", "r") as fp:
104 |         warmup_speaker = json.load(fp)
105 |     resp = requests.post(
106 |         SERVER_URL + "/tts",
107 |         json={
108 |             "text": "This is a warmup request.",
109 |             "language": "en",
110 |             "speaker_embedding": warmup_speaker["speaker_embedding"],
111 |             "gpt_cond_latent": warmup_speaker["gpt_cond_latent"],
112 |         }
113 |     )
114 |     resp.raise_for_status()
115 |     print("Starting the demo...")
116 |     demo.launch(
117 |         share=False,
118 |         debug=False,
119 |         server_port=3009,
120 |         server_name="0.0.0.0",
121 |     )
122 | 


--------------------------------------------------------------------------------
/server/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM pytorch/pytorch:2.1.0-cuda11.8-cudnn8-devel
 2 | ARG DEBIAN_FRONTEND=noninteractive
 3 | 
 4 | RUN apt-get update && \
 5 |     apt-get install --no-install-recommends -y sox libsox-fmt-all curl wget gcc git git-lfs build-essential libaio-dev libsndfile1 ssh ffmpeg && \
 6 |     apt-get clean && apt-get -y autoremove
 7 | 
 8 | WORKDIR /app
 9 | COPY requirements.txt .
10 | RUN python -m pip install --use-deprecated=legacy-resolver -r requirements.txt \
11 |     && python -m pip cache purge
12 | 
13 | RUN python -m unidic download
14 | RUN mkdir -p /app/tts_models
15 | 
16 | COPY main.py .
17 | ENV NVIDIA_DISABLE_REQUIRE=1
18 | 
19 | ENV NUM_THREADS=2
20 | EXPOSE 80
21 | CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "80"]
22 | 


--------------------------------------------------------------------------------
/server/Dockerfile.cpu:
--------------------------------------------------------------------------------
 1 | FROM python:3.11.7
 2 | ARG DEBIAN_FRONTEND=noninteractive
 3 | 
 4 | RUN apt-get update && \
 5 |     apt-get install --no-install-recommends -y sox libsox-fmt-all curl wget gcc git git-lfs build-essential libaio-dev libsndfile1 ssh ffmpeg && \
 6 |     apt-get clean && apt-get -y autoremove
 7 | 
 8 | WORKDIR /app
 9 | COPY requirements_cpu.txt .
10 | RUN python -m pip install --use-deprecated=legacy-resolver -r requirements_cpu.txt \
11 |     && python -m pip cache purge
12 | 
13 | RUN python -m unidic download
14 | RUN mkdir -p /app/tts_models
15 | 
16 | COPY main.py .
17 | ENV USE_CPU=1
18 | 
19 | EXPOSE 80
20 | CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "80"]
21 | 


--------------------------------------------------------------------------------
/server/Dockerfile.cuda121:
--------------------------------------------------------------------------------
 1 | FROM pytorch/pytorch:2.1.0-cuda12.1-cudnn8-devel
 2 | ARG DEBIAN_FRONTEND=noninteractive
 3 | 
 4 | RUN apt-get update && \
 5 |     apt-get install --no-install-recommends -y sox libsox-fmt-all curl wget gcc git git-lfs build-essential libaio-dev libsndfile1 ssh ffmpeg && \
 6 |     apt-get clean && apt-get -y autoremove
 7 | 
 8 | WORKDIR /app
 9 | COPY requirements.txt .
10 | RUN python -m pip install --use-deprecated=legacy-resolver -r requirements.txt \
11 |     && python -m pip cache purge
12 | 
13 | RUN python -m unidic download
14 | RUN mkdir -p /app/tts_models
15 | 
16 | COPY main.py .
17 | 
18 | #Mark this 1 if you have older card
19 | ENV NVIDIA_DISABLE_REQUIRE=0
20 | 
21 | ENV NUM_THREADS=2
22 | EXPOSE 80
23 | CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "80"]
24 | 


--------------------------------------------------------------------------------
/server/main.py:
--------------------------------------------------------------------------------
  1 | import base64
  2 | import io
  3 | import os
  4 | import tempfile
  5 | import wave
  6 | import torch
  7 | import numpy as np
  8 | from typing import List
  9 | from pydantic import BaseModel
 10 | 
 11 | from fastapi import FastAPI, UploadFile, Body
 12 | from fastapi.responses import StreamingResponse
 13 | 
 14 | from TTS.tts.configs.xtts_config import XttsConfig
 15 | from TTS.tts.models.xtts import Xtts
 16 | from TTS.utils.generic_utils import get_user_data_dir
 17 | from TTS.utils.manage import ModelManager
 18 | 
 19 | torch.set_num_threads(int(os.environ.get("NUM_THREADS", os.cpu_count())))
 20 | device = torch.device("cuda" if os.environ.get("USE_CPU", "0") == "0" else "cpu")
 21 | if not torch.cuda.is_available() and device == "cuda":
 22 |     raise RuntimeError("CUDA device unavailable, please use Dockerfile.cpu instead.") 
 23 | 
 24 | custom_model_path = os.environ.get("CUSTOM_MODEL_PATH", "/app/tts_models")
 25 | 
 26 | if os.path.exists(custom_model_path) and os.path.isfile(custom_model_path + "/config.json"):
 27 |     model_path = custom_model_path
 28 |     print("Loading custom model from", model_path, flush=True)
 29 | else:
 30 |     print("Loading default model", flush=True)
 31 |     model_name = "tts_models/multilingual/multi-dataset/xtts_v2"
 32 |     print("Downloading XTTS Model:", model_name, flush=True)
 33 |     ModelManager().download_model(model_name)
 34 |     model_path = os.path.join(get_user_data_dir("tts"), model_name.replace("/", "--"))
 35 |     print("XTTS Model downloaded", flush=True)
 36 | 
 37 | print("Loading XTTS", flush=True)
 38 | config = XttsConfig()
 39 | config.load_json(os.path.join(model_path, "config.json"))
 40 | model = Xtts.init_from_config(config)
 41 | model.load_checkpoint(config, checkpoint_dir=model_path, eval=True, use_deepspeed=True if device == "cuda" else False)
 42 | model.to(device)
 43 | print("XTTS Loaded.", flush=True)
 44 | 
 45 | print("Running XTTS Server ...", flush=True)
 46 | 
 47 | ##### Run fastapi #####
 48 | app = FastAPI(
 49 |     title="XTTS Streaming server",
 50 |     description="""XTTS Streaming server""",
 51 |     version="0.0.1",
 52 |     docs_url="/",
 53 | )
 54 | 
 55 | 
 56 | @app.post("/clone_speaker")
 57 | def predict_speaker(wav_file: UploadFile):
 58 |     """Compute conditioning inputs from reference audio file."""
 59 |     temp_audio_name = next(tempfile._get_candidate_names())
 60 |     with open(temp_audio_name, "wb") as temp, torch.inference_mode():
 61 |         temp.write(io.BytesIO(wav_file.file.read()).getbuffer())
 62 |         gpt_cond_latent, speaker_embedding = model.get_conditioning_latents(
 63 |             temp_audio_name
 64 |         )
 65 |     return {
 66 |         "gpt_cond_latent": gpt_cond_latent.cpu().squeeze().half().tolist(),
 67 |         "speaker_embedding": speaker_embedding.cpu().squeeze().half().tolist(),
 68 |     }
 69 | 
 70 | 
 71 | def postprocess(wav):
 72 |     """Post process the output waveform"""
 73 |     if isinstance(wav, list):
 74 |         wav = torch.cat(wav, dim=0)
 75 |     wav = wav.clone().detach().cpu().numpy()
 76 |     wav = wav[None, : int(wav.shape[0])]
 77 |     wav = np.clip(wav, -1, 1)
 78 |     wav = (wav * 32767).astype(np.int16)
 79 |     return wav
 80 | 
 81 | 
 82 | def encode_audio_common(
 83 |     frame_input, encode_base64=True, sample_rate=24000, sample_width=2, channels=1
 84 | ):
 85 |     """Return base64 encoded audio"""
 86 |     wav_buf = io.BytesIO()
 87 |     with wave.open(wav_buf, "wb") as vfout:
 88 |         vfout.setnchannels(channels)
 89 |         vfout.setsampwidth(sample_width)
 90 |         vfout.setframerate(sample_rate)
 91 |         vfout.writeframes(frame_input)
 92 | 
 93 |     wav_buf.seek(0)
 94 |     if encode_base64:
 95 |         b64_encoded = base64.b64encode(wav_buf.getbuffer()).decode("utf-8")
 96 |         return b64_encoded
 97 |     else:
 98 |         return wav_buf.read()
 99 | 
100 | 
101 | class StreamingInputs(BaseModel):
102 |     speaker_embedding: List[float]
103 |     gpt_cond_latent: List[List[float]]
104 |     text: str
105 |     language: str
106 |     add_wav_header: bool = True
107 |     stream_chunk_size: str = "20"
108 | 
109 | 
110 | def predict_streaming_generator(parsed_input: dict = Body(...)):
111 |     speaker_embedding = torch.tensor(parsed_input.speaker_embedding).unsqueeze(0).unsqueeze(-1)
112 |     gpt_cond_latent = torch.tensor(parsed_input.gpt_cond_latent).reshape((-1, 1024)).unsqueeze(0)
113 |     text = parsed_input.text
114 |     language = parsed_input.language
115 | 
116 |     stream_chunk_size = int(parsed_input.stream_chunk_size)
117 |     add_wav_header = parsed_input.add_wav_header
118 | 
119 | 
120 |     chunks = model.inference_stream(
121 |         text,
122 |         language,
123 |         gpt_cond_latent,
124 |         speaker_embedding,
125 |         stream_chunk_size=stream_chunk_size,
126 |         enable_text_splitting=True
127 |     )
128 | 
129 |     for i, chunk in enumerate(chunks):
130 |         chunk = postprocess(chunk)
131 |         if i == 0 and add_wav_header:
132 |             yield encode_audio_common(b"", encode_base64=False)
133 |             yield chunk.tobytes()
134 |         else:
135 |             yield chunk.tobytes()
136 | 
137 | 
138 | @app.post("/tts_stream")
139 | def predict_streaming_endpoint(parsed_input: StreamingInputs):
140 |     return StreamingResponse(
141 |         predict_streaming_generator(parsed_input),
142 |         media_type="audio/wav",
143 |     )
144 | 
145 | class TTSInputs(BaseModel):
146 |     speaker_embedding: List[float]
147 |     gpt_cond_latent: List[List[float]]
148 |     text: str
149 |     language: str
150 | 
151 | @app.post("/tts")
152 | def predict_speech(parsed_input: TTSInputs):
153 |     speaker_embedding = torch.tensor(parsed_input.speaker_embedding).unsqueeze(0).unsqueeze(-1)
154 |     gpt_cond_latent = torch.tensor(parsed_input.gpt_cond_latent).reshape((-1, 1024)).unsqueeze(0)
155 |     text = parsed_input.text
156 |     language = parsed_input.language
157 | 
158 |     out = model.inference(
159 |         text,
160 |         language,
161 |         gpt_cond_latent,
162 |         speaker_embedding,
163 |     )
164 | 
165 |     wav = postprocess(torch.tensor(out["wav"]))
166 | 
167 |     return encode_audio_common(wav.tobytes())
168 | 
169 | 
170 | @app.get("/studio_speakers")
171 | def get_speakers():
172 |     if hasattr(model, "speaker_manager") and hasattr(model.speaker_manager, "speakers"):
173 |         return {
174 |             speaker: {
175 |                 "speaker_embedding": model.speaker_manager.speakers[speaker]["speaker_embedding"].cpu().squeeze().half().tolist(),
176 |                 "gpt_cond_latent": model.speaker_manager.speakers[speaker]["gpt_cond_latent"].cpu().squeeze().half().tolist(),
177 |             }
178 |             for speaker in model.speaker_manager.speakers.keys()
179 |         }
180 |     else:
181 |         return {}
182 |         
183 | @app.get("/languages")
184 | def get_languages():
185 |     return config.languages


--------------------------------------------------------------------------------
/server/requirements.txt:
--------------------------------------------------------------------------------
 1 | TTS @ git+https://github.com/coqui-ai/TTS@fa28f99f1508b5b5366539b2149963edcb80ba62
 2 | uvicorn[standard]==0.23.2
 3 | fastapi==0.95.2
 4 | deepspeed==0.10.3
 5 | pydantic==1.10.13
 6 | python-multipart==0.0.6
 7 | typing-extensions>=4.8.0
 8 | numpy==1.24.3
 9 | cutlet
10 | mecab-python3==1.0.6
11 | unidic-lite==1.0.8
12 | unidic==1.1.0
13 | 


--------------------------------------------------------------------------------
/server/requirements_cpu.txt:
--------------------------------------------------------------------------------
 1 | TTS @ git+https://github.com/coqui-ai/TTS@fa28f99f1508b5b5366539b2149963edcb80ba62
 2 | uvicorn[standard]==0.23.2
 3 | fastapi==0.95.2
 4 | pydantic==1.10.13
 5 | python-multipart==0.0.6
 6 | typing-extensions>=4.8.0
 7 | numpy==1.24.3
 8 | cutlet
 9 | mecab-python3==1.0.6
10 | unidic-lite==1.0.8
11 | unidic==1.1.0
12 | 


--------------------------------------------------------------------------------
/test/requirements.txt:
--------------------------------------------------------------------------------
1 | requests==2.31.0
2 | gradio==3.50.2
3 | 


--------------------------------------------------------------------------------
/test/test_streaming.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import json
  3 | import shutil
  4 | import subprocess
  5 | import sys
  6 | import time
  7 | from typing import Iterator
  8 | 
  9 | import requests
 10 | 
 11 | 
 12 | def is_installed(lib_name: str) -> bool:
 13 |     lib = shutil.which(lib_name)
 14 |     if lib is None:
 15 |         return False
 16 |     return True
 17 | 
 18 | 
 19 | def save(audio: bytes, filename: str) -> None:
 20 |     with open(filename, "wb") as f:
 21 |         f.write(audio)
 22 | 
 23 | 
 24 | def stream_ffplay(audio_stream, output_file, save=True):
 25 |     if not save:
 26 |         ffplay_cmd = ["ffplay", "-nodisp", "-probesize", "1024", "-autoexit", "-"]
 27 |     else:
 28 |         print("Saving to ", output_file)
 29 |         ffplay_cmd = ["ffmpeg", "-probesize", "1024", "-i", "-", output_file]
 30 | 
 31 |     ffplay_proc = subprocess.Popen(ffplay_cmd, stdin=subprocess.PIPE)
 32 |     for chunk in audio_stream:
 33 |         if chunk is not None:
 34 |             ffplay_proc.stdin.write(chunk)
 35 | 
 36 |     # close on finish
 37 |     ffplay_proc.stdin.close()
 38 |     ffplay_proc.wait()
 39 | 
 40 | 
 41 | def tts(text, speaker, language, server_url, stream_chunk_size) -> Iterator[bytes]:
 42 |     start = time.perf_counter()
 43 |     speaker["text"] = text
 44 |     speaker["language"] = language
 45 |     speaker["stream_chunk_size"] = stream_chunk_size  # you can reduce it to get faster response, but degrade quality
 46 |     res = requests.post(
 47 |         f"{server_url}/tts_stream",
 48 |         json=speaker,
 49 |         stream=True,
 50 |     )
 51 |     end = time.perf_counter()
 52 |     print(f"Time to make POST: {end-start}s", file=sys.stderr)
 53 | 
 54 |     if res.status_code != 200:
 55 |         print("Error:", res.text)
 56 |         sys.exit(1)
 57 | 
 58 |     first = True
 59 |     for chunk in res.iter_content(chunk_size=512):
 60 |         if first:
 61 |             end = time.perf_counter()
 62 |             print(f"Time to first chunk: {end-start}s", file=sys.stderr)
 63 |             first = False
 64 |         if chunk:
 65 |             yield chunk
 66 | 
 67 |     print("⏱️ response.elapsed:", res.elapsed)
 68 | 
 69 | 
 70 | def get_speaker(ref_audio,server_url):
 71 |     files = {"wav_file": ("reference.wav", open(ref_audio, "rb"))}
 72 |     response = requests.post(f"{server_url}/clone_speaker", files=files)
 73 |     return response.json()
 74 | 
 75 | 
 76 | if __name__ == "__main__":
 77 |     parser = argparse.ArgumentParser()
 78 |     parser.add_argument(
 79 |         "--text",
 80 |         default="It took me quite a long time to develop a voice and now that I have it I am not going to be silent.",
 81 |         help="text input for TTS"
 82 |     )
 83 |     parser.add_argument(
 84 |         "--language",
 85 |         default="en",
 86 |         help="Language to use default is 'en'  (English)"
 87 |     )
 88 |     parser.add_argument(
 89 |         "--output_file",
 90 |         default=None,
 91 |         help="Save TTS output to given filename"
 92 |     )
 93 |     parser.add_argument(
 94 |         "--ref_file",
 95 |         default=None,
 96 |         help="Reference audio file to use, when not given will use default"
 97 |     )
 98 |     parser.add_argument(
 99 |         "--server_url",
100 |         default="http://localhost:8000",
101 |         help="Server url http://localhost:8000 default, change to your server location "
102 |     )
103 |     parser.add_argument(
104 |         "--stream_chunk_size",
105 |         default="20",
106 |         help="Stream chunk size , 20 default, reducing will get faster latency but may degrade quality"
107 |     )
108 |     args = parser.parse_args()
109 | 
110 |     with open("./default_speaker.json", "r") as file:
111 |         speaker = json.load(file)
112 | 
113 |     if args.ref_file is not None:
114 |         print("Computing the latents for a new reference...")
115 |         speaker = get_speaker(args.ref_file, args.server_url)
116 | 
117 |     audio = stream_ffplay(
118 |         tts(
119 |             args.text,
120 |             speaker,
121 |             args.language,
122 |             args.server_url,
123 |             args.stream_chunk_size
124 |         ), 
125 |         args.output_file,
126 |         save=bool(args.output_file)
127 |     )
128 | 


--------------------------------------------------------------------------------