├── README.md ├── data.json ├── input.txt └── tts.py /README.md: -------------------------------------------------------------------------------- 1 | # TTS-Grabber 2 | Quick thing i made about a year ago to download any text with any tts voice, 829 voices to choose from currently. 3 | 4 | The program will split the input into multiple files every 1500 words or so to not hit any cutoff limits from TTS providers. 5 | 6 | ## Usage: 7 | Edit `input.txt` to change the text to synthesize. 8 | 9 | You can run just `tts.py` without any parameters to open the voice selector with default settings. 10 | 11 | #### Parameters 12 | ``` 13 | PARAMETER TYPE DESCRIPTION 14 | -h, -help --- Shows the help info. 15 | -v, -voice Int Sets the voice id to use. 16 | -s, -speed Int Sets the TTS voice speed (in percent). 17 | -vol, -volume Int Changes the TTS volume (in decibels). 18 | -pp, -period-pause Flt Sets how long the TTS should pause for at periods (in seconds). 19 | -cp, -comma-pause Flt Sets how long the TTS should pause for at commas (in seconds). 20 | -lp, -line-pause Flt Sets how long the TTS should pause for at newlines (in seconds). 21 | ``` 22 | 23 | Example with parameters: 24 | `tts.py -v 777 -s 100 -vol 0 -pp 1 -cp 0.5 -lp 2` 25 | 26 | ###### absolutely no api abuse here 27 | -------------------------------------------------------------------------------- /input.txt: -------------------------------------------------------------------------------- 1 | Here are SSML samples. 2 | I can pause . 3 | I can play a sound 4 | . 5 | I can speak in cardinals. Your number is 10. 6 | Or I can speak in ordinals. You are 10 in line. 7 | Or I can even speak in digits. The digits for ten are 10. 8 | I can also substitute phrases, like the W3C. 9 | -------------------------------------------------------------------------------- /tts.py: -------------------------------------------------------------------------------- 1 | import functools 2 | import requests 3 | import textwrap 4 | import os.path 5 | import time 6 | import json 7 | import sys 8 | 9 | data = [] 10 | lastUsed = {} 11 | 12 | def compare(dic1, dic2): 13 | s1 = dic1["language"] + dic1["gender"] + dic1["name"] 14 | s2 = dic2["language"] + dic2["gender"] + dic2["name"] 15 | 16 | if s1 > s2: 17 | return 1 18 | elif s2 > s1: 19 | return -1 20 | else: 21 | return 0 22 | 23 | if len(sys.argv) == 2 and sys.argv[1].lower().startswith("-h"): 24 | print( 25 | """TTS-Grabber (https://github.com/BleachDev/TTS-Grabber) 26 | 27 | PARAMETER TYPE DESCRIPTION 28 | -h, -help --- Shows the help info. 29 | -v, -voice Int Sets the voice id to use. 30 | -s, -speed Int Sets the TTS voice speed (in percent). 31 | -vol, -volume Int Changes the TTS volume (in decibels). 32 | -pp, -period-pause Flt Sets how long the TTS should pause for at periods (in seconds). 33 | -cp, -comma-pause Flt Sets how long the TTS should pause for at commas (in seconds). 34 | -lp, -line-pause Flt Sets how long the TTS should pause for at newlines (in seconds). 35 | 36 | To see a list of the voices available, run the script without the -v parameter.""") 37 | sys.exit() 38 | 39 | 40 | arg_voice = -1 41 | arg_speed = 100 42 | arg_volume = 0 43 | arg_period_pause = -1 44 | arg_comma_pause = -1 45 | arg_line_pause = -1 46 | 47 | _loop = 1 48 | while _loop <= len(sys.argv) - 2: 49 | try: 50 | arg = sys.argv[_loop].lower() 51 | if arg == "-v" or arg == "-voice": 52 | arg_voice = int(sys.argv[_loop + 1]) 53 | elif arg == "-s" or arg == "-speed": 54 | arg_speed = int(sys.argv[_loop + 1]) 55 | elif arg == "-vol" or arg == "-volume": 56 | arg_volume = float(sys.argv[_loop + 1]) 57 | elif arg == "-pp" or arg == "-period-pause": 58 | arg_period_pause = float(sys.argv[_loop + 1]) 59 | elif arg == "-cp" or arg == "-comma-pause": 60 | arg_comma_pause = float(sys.argv[_loop + 1]) 61 | elif arg == "-lp" or arg == "-line-pause": 62 | arg_line_pause = float(sys.argv[_loop + 1]) 63 | except: 64 | print("error > " + sys.argv[_loop] + " / " + sys.argv[_loop + 1]) 65 | pass 66 | _loop += 2 67 | 68 | if os.path.isfile("lastused.json"): 69 | with open('lastused.json') as f: 70 | lastUsed = json.load(f) 71 | 72 | with open('data.json') as f: 73 | js_data = json.load(f) 74 | 75 | for entry in js_data: 76 | data.append(entry) 77 | 78 | data.sort(key=functools.cmp_to_key(compare)) 79 | 80 | if arg_voice == -1: 81 | print("ID LANGUAGE GENDER NAME TYPE") 82 | for i in range(len(data)): 83 | print( 84 | (" " * (3 - len(str(i + 1)))) + str(i + 1) 85 | + ": " + data[i]["language"] + (" " * (32 - len(data[i]["language"]))) 86 | + data[i]["gender"] + (" " * (8 - len(data[i]["gender"]))) 87 | + data[i]["name"] + (" " * (20 - len(data[i]["name"]))) 88 | + data[i]["voiceType"][0]) 89 | 90 | if lastUsed != {}: 91 | print("...........................................................") 92 | print(" 0: " + lastUsed["language"] + (" " * (32 - len(lastUsed["language"]))) 93 | + lastUsed["gender"] + (" " * (8 - len(lastUsed["gender"]))) 94 | + lastUsed["name"] + (" " * (20 - len(lastUsed["name"]))) 95 | + lastUsed["voiceType"][0]) 96 | 97 | print("...........................................................") 98 | arg_voice = int(input("Choose voice: ")) 99 | 100 | if arg_voice == 0: 101 | voice = lastUsed 102 | else: 103 | voice = data[arg_voice - 1] 104 | 105 | if arg_period_pause >= 0: 106 | ttsText = ttsText.replace(".", ".") 107 | print("Setting Period Pause To " + str(ppause)) 108 | 109 | if arg_comma_pause >= 0: 110 | ttsText = ttsText.replace(",", ",") 111 | print("Setting Comma Pause To " + str(cpause)) 112 | 113 | if arg_line_pause >= 0: 114 | ttsText = ttsText.replace("\n", "\n") 115 | print("Setting Newline Pause To " + str(lpause)) 116 | 117 | print("Using Input File (input.txt)") 118 | with open('input.txt', encoding='utf-8') as f: 119 | ttsText = "".join(f.readlines()) 120 | 121 | #ttsText = ttsText.replace("\r", "").replace("\n", " ") 122 | ttsTextSplit = textwrap.wrap(ttsText, 1500) 123 | ttsTextLen = len(ttsTextSplit) 124 | 125 | with open("lastused.json", "w") as f: 126 | f.write(json.dumps(voice)) 127 | 128 | for i in range(ttsTextLen): 129 | input_text = "

" + ttsTextSplit[i] + "

" 130 | params = { 131 | "globalSpeed": str(arg_speed) + "%", 132 | "globalVolume": ("+" if arg_volume >= 0 else "") + str(arg_volume) + "dB", 133 | "chunk": input_text, 134 | "narrationStyle": "regular", 135 | "platform": "landing_demo", 136 | "ssml": input_text, 137 | "userId": "5pe8l4FrdbczcoHOBkUtp0W37Gh2", 138 | "voice": voice["value"] 139 | } 140 | 141 | print("Seding request.. [" + str(i + 1) + "/" + str(ttsTextLen) + "]") 142 | req = requests.post("https://play.ht/api/transcribe", data=params) 143 | 144 | filename = "_" + voice["name"] + "-" + str(time.time_ns() / 1000) + "-" + str(i + 1) + ".mp3" 145 | try: 146 | response = json.loads(req.text) 147 | 148 | head = requests.head(response["file"]) 149 | filesize = head.headers.get('content-length', -1) 150 | print("Getting file.. [" + str(i + 1) + "/" + str(ttsTextLen) + "] (JSON " + str(round(float(filesize) / 1024, 2)) + " KB)") 151 | 152 | with open(filename, "wb") as f: 153 | f.write(requests.get(response["file"]).content) 154 | except: 155 | # Assume we got an audio file 156 | print("Getting file.. [" + str(i + 1) + "/" + str(ttsTextLen) + "] (MP3? " + str(round(len(req.content) / 1024, 2)) + " KB)") 157 | 158 | with open(filename, "wb") as f: 159 | f.write(req.content) 160 | 161 | print("Saved to " + filename) 162 | --------------------------------------------------------------------------------