├── README.md
├── data.json
├── input.txt
└── tts.py
/README.md:
--------------------------------------------------------------------------------
1 | # TTS-Grabber
2 | Quick thing i made about a year ago to download any text with any tts voice, 829 voices to choose from currently.
3 |
4 | The program will split the input into multiple files every 1500 words or so to not hit any cutoff limits from TTS providers.
5 |
6 | ## Usage:
7 | Edit `input.txt` to change the text to synthesize.
8 |
9 | You can run just `tts.py` without any parameters to open the voice selector with default settings.
10 |
11 | #### Parameters
12 | ```
13 | PARAMETER TYPE DESCRIPTION
14 | -h, -help --- Shows the help info.
15 | -v, -voice Int Sets the voice id to use.
16 | -s, -speed Int Sets the TTS voice speed (in percent).
17 | -vol, -volume Int Changes the TTS volume (in decibels).
18 | -pp, -period-pause Flt Sets how long the TTS should pause for at periods (in seconds).
19 | -cp, -comma-pause Flt Sets how long the TTS should pause for at commas (in seconds).
20 | -lp, -line-pause Flt Sets how long the TTS should pause for at newlines (in seconds).
21 | ```
22 |
23 | Example with parameters:
24 | `tts.py -v 777 -s 100 -vol 0 -pp 1 -cp 0.5 -lp 2`
25 |
26 | ###### absolutely no api abuse here
27 |
--------------------------------------------------------------------------------
/input.txt:
--------------------------------------------------------------------------------
1 | Here are SSML samples.
2 | I can pause .
3 | I can play a sound
4 | .
5 | I can speak in cardinals. Your number is 10.
6 | Or I can speak in ordinals. You are 10 in line.
7 | Or I can even speak in digits. The digits for ten are 10.
8 | I can also substitute phrases, like the W3C.
9 |
--------------------------------------------------------------------------------
/tts.py:
--------------------------------------------------------------------------------
1 | import functools
2 | import requests
3 | import textwrap
4 | import os.path
5 | import time
6 | import json
7 | import sys
8 |
9 | data = []
10 | lastUsed = {}
11 |
12 | def compare(dic1, dic2):
13 | s1 = dic1["language"] + dic1["gender"] + dic1["name"]
14 | s2 = dic2["language"] + dic2["gender"] + dic2["name"]
15 |
16 | if s1 > s2:
17 | return 1
18 | elif s2 > s1:
19 | return -1
20 | else:
21 | return 0
22 |
23 | if len(sys.argv) == 2 and sys.argv[1].lower().startswith("-h"):
24 | print(
25 | """TTS-Grabber (https://github.com/BleachDev/TTS-Grabber)
26 |
27 | PARAMETER TYPE DESCRIPTION
28 | -h, -help --- Shows the help info.
29 | -v, -voice Int Sets the voice id to use.
30 | -s, -speed Int Sets the TTS voice speed (in percent).
31 | -vol, -volume Int Changes the TTS volume (in decibels).
32 | -pp, -period-pause Flt Sets how long the TTS should pause for at periods (in seconds).
33 | -cp, -comma-pause Flt Sets how long the TTS should pause for at commas (in seconds).
34 | -lp, -line-pause Flt Sets how long the TTS should pause for at newlines (in seconds).
35 |
36 | To see a list of the voices available, run the script without the -v parameter.""")
37 | sys.exit()
38 |
39 |
40 | arg_voice = -1
41 | arg_speed = 100
42 | arg_volume = 0
43 | arg_period_pause = -1
44 | arg_comma_pause = -1
45 | arg_line_pause = -1
46 |
47 | _loop = 1
48 | while _loop <= len(sys.argv) - 2:
49 | try:
50 | arg = sys.argv[_loop].lower()
51 | if arg == "-v" or arg == "-voice":
52 | arg_voice = int(sys.argv[_loop + 1])
53 | elif arg == "-s" or arg == "-speed":
54 | arg_speed = int(sys.argv[_loop + 1])
55 | elif arg == "-vol" or arg == "-volume":
56 | arg_volume = float(sys.argv[_loop + 1])
57 | elif arg == "-pp" or arg == "-period-pause":
58 | arg_period_pause = float(sys.argv[_loop + 1])
59 | elif arg == "-cp" or arg == "-comma-pause":
60 | arg_comma_pause = float(sys.argv[_loop + 1])
61 | elif arg == "-lp" or arg == "-line-pause":
62 | arg_line_pause = float(sys.argv[_loop + 1])
63 | except:
64 | print("error > " + sys.argv[_loop] + " / " + sys.argv[_loop + 1])
65 | pass
66 | _loop += 2
67 |
68 | if os.path.isfile("lastused.json"):
69 | with open('lastused.json') as f:
70 | lastUsed = json.load(f)
71 |
72 | with open('data.json') as f:
73 | js_data = json.load(f)
74 |
75 | for entry in js_data:
76 | data.append(entry)
77 |
78 | data.sort(key=functools.cmp_to_key(compare))
79 |
80 | if arg_voice == -1:
81 | print("ID LANGUAGE GENDER NAME TYPE")
82 | for i in range(len(data)):
83 | print(
84 | (" " * (3 - len(str(i + 1)))) + str(i + 1)
85 | + ": " + data[i]["language"] + (" " * (32 - len(data[i]["language"])))
86 | + data[i]["gender"] + (" " * (8 - len(data[i]["gender"])))
87 | + data[i]["name"] + (" " * (20 - len(data[i]["name"])))
88 | + data[i]["voiceType"][0])
89 |
90 | if lastUsed != {}:
91 | print("...........................................................")
92 | print(" 0: " + lastUsed["language"] + (" " * (32 - len(lastUsed["language"])))
93 | + lastUsed["gender"] + (" " * (8 - len(lastUsed["gender"])))
94 | + lastUsed["name"] + (" " * (20 - len(lastUsed["name"])))
95 | + lastUsed["voiceType"][0])
96 |
97 | print("...........................................................")
98 | arg_voice = int(input("Choose voice: "))
99 |
100 | if arg_voice == 0:
101 | voice = lastUsed
102 | else:
103 | voice = data[arg_voice - 1]
104 |
105 | if arg_period_pause >= 0:
106 | ttsText = ttsText.replace(".", ".")
107 | print("Setting Period Pause To " + str(ppause))
108 |
109 | if arg_comma_pause >= 0:
110 | ttsText = ttsText.replace(",", ",")
111 | print("Setting Comma Pause To " + str(cpause))
112 |
113 | if arg_line_pause >= 0:
114 | ttsText = ttsText.replace("\n", "\n")
115 | print("Setting Newline Pause To " + str(lpause))
116 |
117 | print("Using Input File (input.txt)")
118 | with open('input.txt', encoding='utf-8') as f:
119 | ttsText = "".join(f.readlines())
120 |
121 | #ttsText = ttsText.replace("\r", "").replace("\n", " ")
122 | ttsTextSplit = textwrap.wrap(ttsText, 1500)
123 | ttsTextLen = len(ttsTextSplit)
124 |
125 | with open("lastused.json", "w") as f:
126 | f.write(json.dumps(voice))
127 |
128 | for i in range(ttsTextLen):
129 | input_text = "" + ttsTextSplit[i] + "
"
130 | params = {
131 | "globalSpeed": str(arg_speed) + "%",
132 | "globalVolume": ("+" if arg_volume >= 0 else "") + str(arg_volume) + "dB",
133 | "chunk": input_text,
134 | "narrationStyle": "regular",
135 | "platform": "landing_demo",
136 | "ssml": input_text,
137 | "userId": "5pe8l4FrdbczcoHOBkUtp0W37Gh2",
138 | "voice": voice["value"]
139 | }
140 |
141 | print("Seding request.. [" + str(i + 1) + "/" + str(ttsTextLen) + "]")
142 | req = requests.post("https://play.ht/api/transcribe", data=params)
143 |
144 | filename = "_" + voice["name"] + "-" + str(time.time_ns() / 1000) + "-" + str(i + 1) + ".mp3"
145 | try:
146 | response = json.loads(req.text)
147 |
148 | head = requests.head(response["file"])
149 | filesize = head.headers.get('content-length', -1)
150 | print("Getting file.. [" + str(i + 1) + "/" + str(ttsTextLen) + "] (JSON " + str(round(float(filesize) / 1024, 2)) + " KB)")
151 |
152 | with open(filename, "wb") as f:
153 | f.write(requests.get(response["file"]).content)
154 | except:
155 | # Assume we got an audio file
156 | print("Getting file.. [" + str(i + 1) + "/" + str(ttsTextLen) + "] (MP3? " + str(round(len(req.content) / 1024, 2)) + " KB)")
157 |
158 | with open(filename, "wb") as f:
159 | f.write(req.content)
160 |
161 | print("Saved to " + filename)
162 |
--------------------------------------------------------------------------------