├── app.py
├── examples
    ├── female.wav
    └── male.wav
├── icon.png
├── install.json
├── pinokio.js
├── pinokio_meta.json
├── requirements.txt
├── reset.js
├── start.json
├── torch.js
└── update.js


/app.py:
--------------------------------------------------------------------------------
  1 | import gradio as gr
  2 | from TTS.api import TTS
  3 | import torch
  4 | 
  5 | if torch.cuda.is_available():
  6 |     device = "cuda"
  7 | elif torch.backends.mps.is_available():
  8 |     device = "cpu"  # mps doesn't work yet
  9 | else:
 10 |     device = "cpu"
 11 | 
 12 | torch.set_default_device(device)
 13 | tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2")
 14 | tts.to(device)
 15 | 
 16 | def predict(prompt, language, audio_file_pth, agree):
 17 |     if agree == True:
 18 |         tts.tts_to_file(
 19 |             text=prompt,
 20 |             file_path="output.wav",
 21 |             speaker_wav=audio_file_pth,
 22 |             language=language,
 23 |         )
 24 | 
 25 |         return (
 26 |             gr.make_waveform(
 27 |                 audio="output.wav",
 28 |             ),
 29 |             "output.wav",
 30 |         )
 31 |     else:
 32 |         gr.Warning("Please accept the Terms & Condition!")
 33 | 
 34 | 
 35 | title = "Coqui🐸 XTTS"
 36 | 
 37 | description = """
 38 | <a href="https://huggingface.co/coqui/XTTS-v1">XTTS</a> is a Voice generation model that lets you clone voices into different languages by using just a quick 3-second audio clip. 
 39 | <br/>
 40 | Built on Tortoise, XTTS has important model changes that make cross-language voice cloning and multi-lingual speech generation super easy. 
 41 | <br/>
 42 | This is the same model that powers Coqui Studio, and Coqui API, however we apply a few tricks to make it faster and support streaming inference.
 43 | """
 44 | 
 45 | article = """
 46 | <div style='margin:20px auto;'>
 47 | <p>By using this demo you agree to the terms of the Coqui Public Model License at https://coqui.ai/cpml</p>
 48 | </div>
 49 | """
 50 | 
 51 | examples = [
 52 |     [
 53 |         "Once when I was six years old I saw a magnificent picture.",
 54 |         "en",
 55 |         "examples/female.wav",
 56 |         True,
 57 |     ],
 58 |     [
 59 |         "Lorsque j'avais six ans j'ai vu, une fois, une magnifique image.",
 60 |         "fr",
 61 |         "examples/male.wav",
 62 |         True,
 63 |     ],
 64 |     [
 65 |         "Un tempo lontano, quando avevo sei anni, vidi un magnifico disegno.",
 66 |         "it",
 67 |         "examples/female.wav",
 68 |         True,
 69 |     ],
 70 | ]
 71 | 
 72 | gr.Interface(
 73 |     fn=predict,
 74 |     inputs=[
 75 |         gr.Textbox(
 76 |             label="Text Prompt",
 77 |             info="One or two sentences at a time is better",
 78 |             value="It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent.",
 79 |         ),
 80 |         gr.Dropdown(
 81 |             label="Language",
 82 |             info="Select an output language for the synthesised speech",
 83 |             choices=[
 84 |                 "en",
 85 |                 "es",
 86 |                 "fr",
 87 |                 "de",
 88 |                 "it",
 89 |                 "pt",
 90 |                 "pl",
 91 |                 "tr",
 92 |                 "ru",
 93 |                 "nl",
 94 |                 "cs",
 95 |                 "ar",
 96 |                 "zh-cn",
 97 |                 "ja",
 98 |                 "ko",
 99 |                 "hu",
100 |                 "hi"
101 |             ],
102 |             max_choices=1,
103 |             value="en",
104 |         ),
105 |         gr.Audio(
106 |             label="Reference Audio",
107 |             type="filepath",
108 |             value="examples/female.wav",
109 |         ),
110 |         gr.Checkbox(
111 |             label="Agree",
112 |             value=False,
113 |             info="I agree to the terms of the Coqui Public Model License at https://coqui.ai/cpml",
114 |         ),
115 |     ],
116 |     outputs=[
117 |         gr.Video(label="Waveform Visual"),
118 |         gr.Audio(label="Synthesised Audio"),
119 |     ],
120 |     title=title,
121 |     description=description,
122 |     article=article,
123 |     examples=examples,
124 | ).queue().launch(debug=True)
125 | 


--------------------------------------------------------------------------------
/examples/female.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cocktailpeanut/xtts.pinokio/b8b0aa717249a7710785f64fcc05e9aced34cc9f/examples/female.wav


--------------------------------------------------------------------------------
/examples/male.wav:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:937c74afad004937e00d1687c68e02210e0c5d93ac072a7c8aeb9ab573517bb1
3 | size 762126
4 | 


--------------------------------------------------------------------------------
/icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cocktailpeanut/xtts.pinokio/b8b0aa717249a7710785f64fcc05e9aced34cc9f/icon.png


--------------------------------------------------------------------------------
/install.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "run": [{
 3 |     "method": "shell.run",
 4 |     "params": {
 5 |       "message": "{{os.platform() === 'win32' ? '' : 'brew install mecab'}}"
 6 |     }
 7 |   },
 8 |   {
 9 |     "method": "shell.run",
10 |     "params": {
11 |       "venv": "env",
12 |       "message": [
13 |         "uv pip install gradio==4.44.0 pydantic==2.10.6",
14 |         "uv pip install -r requirements.txt"
15 |       ]
16 |     }
17 |   },
18 |   {
19 |     "method": "script.start",
20 |     "params": {
21 |       "uri": "torch.js",
22 |       "params": {
23 |         "venv": "env"
24 |       }
25 |     }
26 |   },
27 |   {
28 |     "method": "notify",
29 |     "params": { "html": "Click the 'start' tab to get started!" }
30 |   }]
31 | }
32 | 


--------------------------------------------------------------------------------
/pinokio.js:
--------------------------------------------------------------------------------
 1 | const path = require("path")
 2 | module.exports = {
 3 |   version: "3.0",
 4 |   title: "XTTS",
 5 |   description: "clone voices into different languages by using just a quick 3-second audio clip. (a local version of https://huggingface.co/spaces/coqui/xtts)",
 6 |   icon: "icon.png",
 7 |   menu: async (kernel, info) => {
 8 |     let installed = info.exists("env")
 9 |     let running = {
10 |       install: info.running("install.json"),
11 |       start: info.running("start.json"),
12 |       update: info.running("update.js"),
13 |       reset: info.running("reset.js")
14 |     }
15 |     if (running.install) {
16 |       return [{
17 |         default: true,
18 |         icon: "fa-solid fa-plug",
19 |         text: "Installing",
20 |         href: "install.json",
21 |       }]
22 |     } else if (running.update) {
23 |       return [{
24 |         default: true,
25 |         icon: 'fa-solid fa-terminal',
26 |         text: "Updating",
27 |         href: "update.js",
28 |       }]
29 |     } else if (installed) {
30 |       if (running.start) {
31 |         let local = info.local("start.json")
32 |         if (local && local.url) {
33 |           return [{
34 |             default: true,
35 |             icon: "fa-solid fa-rocket",
36 |             text: "Open Web UI",
37 |             href: local.url,
38 |           }, {
39 |             icon: 'fa-solid fa-terminal',
40 |             text: "Terminal",
41 |             href: "start.json",
42 |           }]
43 |         } else {
44 |           return [{
45 |             default: true,
46 |             icon: 'fa-solid fa-terminal',
47 |             text: "Terminal",
48 |             href: "start.json",
49 |           }]
50 |         }
51 |       } else if (running.reset) {
52 |           return [{
53 |             default: true,
54 |             icon: 'fa-solid fa-terminal',
55 |             text: "Resetting",
56 |             href: "reset.js",
57 |           }]
58 |       } else {
59 |         return [{
60 |           default: true,
61 |           icon: "fa-solid fa-power-off",
62 |           text: "Start",
63 |           href: "start.json",
64 |         }, {
65 |           icon: "fa-solid fa-plug",
66 |           text: "Update",
67 |           href: "update.js",
68 |         }, {
69 |           icon: "fa-solid fa-plug",
70 |           text: "Install",
71 |           href: "install.json",
72 |         }, {
73 |           icon: "fa-regular fa-circle-xmark",
74 |           text: "Reset",
75 |           href: "reset.js",
76 |         }]
77 |       }
78 |     } else {
79 |       return [{
80 |         default: true,
81 |         icon: "fa-solid fa-plug",
82 |         text: "Install",
83 |         href: "install.json",
84 |       }]
85 |     }
86 |   }
87 | }
88 | 


--------------------------------------------------------------------------------
/pinokio_meta.json:
--------------------------------------------------------------------------------
1 | {
2 |   "posts": [
3 |     "https://x.com/cocktailpeanut/status/1702759980263162098"
4 |   ]
5 | }
6 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | git+https://github.com/cocktailpeanut/TTS#egg=TTS
2 | 


--------------------------------------------------------------------------------
/reset.js:
--------------------------------------------------------------------------------
1 | module.exports = {
2 |   run: [{
3 |     method: "fs.rm",
4 |     params: {
5 |       path: "env"
6 |     }
7 |   }]
8 | }
9 | 


--------------------------------------------------------------------------------
/start.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "daemon": true,
 3 |   "run": [
 4 |     {
 5 |       "method": "shell.run",
 6 |       "params": {
 7 |         "venv": "env",
 8 |         "env": {
 9 |           "XDG_DATA_HOME": "./cache/HF_HOME/hub",
10 |           "ALLOW_FILE_UPLOAD": "True",
11 |           "COQUI_TOS_AGREED": "1"
12 |         },
13 |         "message": "python app.py",
14 |         "on": [{
15 |           "event": "/(http://[0-9.:]+)/",
16 |           "done": true
17 |         }]
18 |       }
19 |     }, {
20 |       "method": "local.set",
21 |       "params": {
22 |         "url": "{{input.event[0]}}"
23 |       }
24 |     } 
25 |   ]
26 | }
27 | 


--------------------------------------------------------------------------------
/torch.js:
--------------------------------------------------------------------------------
 1 | module.exports = {
 2 |   run: [
 3 |     // nvidia 50 series
 4 |     {
 5 |       "when": "{{gpu === 'nvidia' && kernel.gpu_model && / 50.+/.test(kernel.gpu_model) }}",
 6 |       "method": "shell.run",
 7 |       "params": {
 8 |         "venv": "{{args && args.venv ? args.venv : null}}",
 9 |         "path": "{{args && args.path ? args.path : '.'}}",
10 |         "message": "uv pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu128"
11 |       },
12 |       "next": null
13 |     },
14 |     // windows nvidia
15 |     {
16 |       "when": "{{platform === 'win32' && gpu === 'nvidia'}}",
17 |       "method": "shell.run",
18 |       "params": {
19 |         "venv": "{{args && args.venv ? args.venv : null}}",
20 |         "path": "{{args && args.path ? args.path : '.'}}",
21 |         "message": "uv pip install torch==2.5.1 torchvision==0.20.1 torchaudio==2.5.1 {{args && args.xformers ? 'xformers' : ''}} --index-url https://download.pytorch.org/whl/cu121"
22 |       }
23 |     },
24 |     // windows amd
25 |     {
26 |       "when": "{{platform === 'win32' && gpu === 'amd'}}",
27 |       "method": "shell.run",
28 |       "params": {
29 |         "venv": "{{args && args.venv ? args.venv : null}}",
30 |         "path": "{{args && args.path ? args.path : '.'}}",
31 |         "message": "uv pip install torch-directml torchaudio torchvision numpy==1.26.4"
32 |       }
33 |     },
34 |     // windows cpu
35 |     {
36 |       "when": "{{platform === 'win32' && (gpu !== 'nvidia' && gpu !== 'amd')}}",
37 |       "method": "shell.run",
38 |       "params": {
39 |         "venv": "{{args && args.venv ? args.venv : null}}",
40 |         "path": "{{args && args.path ? args.path : '.'}}",
41 |         "message": "uv pip install torch==2.5.1 torchvision==0.20.1 torchaudio==2.5.1 numpy==1.26.4"
42 |       }
43 |     },
44 |     // apple mac
45 |     {
46 |       "when": "{{platform === 'darwin' && arch === 'arm64'}}",
47 |       "method": "shell.run",
48 |       "params": {
49 |         "venv": "{{args && args.venv ? args.venv : null}}",
50 |         "path": "{{args && args.path ? args.path : '.'}}",
51 |         "message": "uv pip install torch==2.5.1 torchvision==0.20.1 torchaudio==2.5.1 --index-url https://download.pytorch.org/whl/cpu"
52 |       }
53 |     },
54 |     // intel mac
55 |     {
56 |       "when": "{{platform === 'darwin' && arch !== 'arm64'}}",
57 |       "method": "shell.run",
58 |       "params": {
59 |         "venv": "{{args && args.venv ? args.venv : null}}",
60 |         "path": "{{args && args.path ? args.path : '.'}}",
61 |         "message": "uv pip install torch==2.1.2 torchvision==0.16.2 torchaudio==2.1.2 --index-url https://download.pytorch.org/whl/cpu"
62 |       }
63 |     },
64 |     // linux nvidia
65 |     {
66 |       "when": "{{platform === 'linux' && gpu === 'nvidia'}}",
67 |       "method": "shell.run",
68 |       "params": {
69 |         "venv": "{{args && args.venv ? args.venv : null}}",
70 |         "path": "{{args && args.path ? args.path : '.'}}",
71 |         "message": "uv pip install torch==2.5.1 torchvision==0.20.1 torchaudio==2.5.1 {{args && args.xformers ? 'xformers' : ''}} --index-url https://download.pytorch.org/whl/cu121"
72 |       }
73 |     },
74 |     // linux rocm (amd)
75 |     {
76 |       "when": "{{platform === 'linux' && gpu === 'amd'}}",
77 |       "method": "shell.run",
78 |       "params": {
79 |         "venv": "{{args && args.venv ? args.venv : null}}",
80 |         "path": "{{args && args.path ? args.path : '.'}}",
81 |         "message": "uv pip install torch==2.5.1 torchvision==0.20.1 torchaudio==2.5.1 --index-url https://download.pytorch.org/whl/rocm6.2"
82 |       }
83 |     },
84 |     // linux cpu
85 |     {
86 |       "when": "{{platform === 'linux' && (gpu !== 'amd' && gpu !=='nvidia')}}",
87 |       "method": "shell.run",
88 |       "params": {
89 |         "venv": "{{args && args.venv ? args.venv : null}}",
90 |         "path": "{{args && args.path ? args.path : '.'}}",
91 |         "message": "uv pip install torch==2.5.1 torchvision==0.20.1 torchaudio==2.5.1 --index-url https://download.pytorch.org/whl/cpu"
92 |       }
93 |     }
94 |   ]
95 | }
96 | 


--------------------------------------------------------------------------------
/update.js:
--------------------------------------------------------------------------------
1 | module.exports = {
2 |   run: [{
3 |     method: "shell.run",
4 |     params: {
5 |       message: "git pull"
6 |     }
7 |   }]
8 | }
9 | 


--------------------------------------------------------------------------------