├── .github └── workflows │ └── static.yml ├── .prettierrc.yml ├── LICENSE.md ├── README.md └── src ├── index.html └── index.js /.github/workflows/static.yml: -------------------------------------------------------------------------------- 1 | # Simple workflow for deploying static content to GitHub Pages 2 | name: Deploy static content to Pages 3 | 4 | on: 5 | # Runs on pushes targeting the default branch 6 | push: 7 | branches: ["main"] 8 | 9 | # Allows you to run this workflow manually from the Actions tab 10 | workflow_dispatch: 11 | 12 | # Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages 13 | permissions: 14 | contents: read 15 | pages: write 16 | id-token: write 17 | 18 | # Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued. 19 | # However, do NOT cancel in-progress runs as we want to allow these production deployments to complete. 20 | concurrency: 21 | group: "pages" 22 | cancel-in-progress: false 23 | 24 | jobs: 25 | # Single deploy job since we're just deploying 26 | deploy: 27 | environment: 28 | name: github-pages 29 | url: ${{ steps.deployment.outputs.page_url }} 30 | runs-on: ubuntu-latest 31 | steps: 32 | - name: Checkout 33 | uses: actions/checkout@v4 34 | - name: Setup Pages 35 | uses: actions/configure-pages@v5 36 | - name: Upload artifact 37 | uses: actions/upload-pages-artifact@v3 38 | with: 39 | # Upload entire repository 40 | path: src 41 | - name: Deploy to GitHub Pages 42 | id: deployment 43 | uses: actions/deploy-pages@v4 44 | -------------------------------------------------------------------------------- /.prettierrc.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LinqLover/simple-openai-tts-playground/5ec5c60f15c01bb8a5fabc7fbb728e747023ea4d/.prettierrc.yml -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | # License 2 | 3 | ## MIT License 4 | 5 | Copyright © 2024 Christoph Thiede 6 | 7 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 8 | 9 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 10 | 11 | **The Software is provided "as is", without warranty of any kind, express or implied, including but not limited to the warranties of merchantability, fitness for a particular purpose, and non-infringement. In no event shall the authors or copyright holders be liable for any claim, damages, or other liability, whether in an action of contract, tort or otherwise, arising from, 12 | out of or in connection with the Software or the use or other dealings in the Software.** 13 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Simple OpenAI TTS Playground 2 | 3 | > Try out the [OpenAI Text to Speech API](https://platform.openai.com/docs/api-reference/audio) in your browser. 4 | 5 | Minimal yet working prototype. Feel free to propose features and contribute PRs! 6 | 7 | **Visit the playground here: ** 8 | 9 | ## Current Features 10 | 11 | - Select different voices and models 12 | - Automatic division of long texts 13 | - Price display 14 | - Caching of audio files (in local browser storage) 15 | 16 | ## Development 17 | 18 | To run the playground locally: 19 | 20 | 1. Check out the repository 21 | 2. Host a web server using `python3 -m http.server` (or using node analogously) 22 | 3. Open the playground on `http://localhost:8000/` (NOT `http://0.0.0.0/` because browsers might treat it as insecure context) 23 | -------------------------------------------------------------------------------- /src/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 10 | Simple OpenAI TTS Playground 11 | 17 | 18 | 19 |
20 |
21 | 25 | Simple OpenAI TTS Playground 26 | 27 |
28 | 29 |
30 |

Simple OpenAI TTS Playground

31 |

32 | Try out the 33 | OpenAI Text to Speech API 36 | in your browser. 37 |

38 |

39 | Minimal yet working prototype. 40 | Visit this project on GitHub 43 | and feel free to contribute. Disclaimer: This project 44 | is not affiliated with OpenAI. 45 |

46 |

47 | You can 48 | create an API key 49 | or track your usage in 50 | the OpenAI platform. 51 |

52 | 53 |
54 | 60 | 68 | 72 | 73 | 78 | 84 | 87 | 93 |
94 |
95 |
96 | 97 | 98 | 99 | 100 | -------------------------------------------------------------------------------- /src/index.js: -------------------------------------------------------------------------------- 1 | //#region Constants 2 | const OPENAPI_URL = "https://api.openai.com/v1"; 3 | const PRICES_PER_MILLION = { 4 | "tts-1": 15.0, 5 | "tts-1-hd": 30.0, 6 | }; 7 | //#endregion 8 | 9 | //#region Utils 10 | const sha256 = async (source) => { 11 | const sourceBytes = new TextEncoder().encode(source); 12 | const digest = await crypto.subtle.digest("SHA-256", sourceBytes); 13 | const resultBytes = [...new Uint8Array(digest)]; 14 | return resultBytes.map((x) => x.toString(16).padStart(2, "0")).join(""); 15 | }; 16 | 17 | const blobToBase64 = (blob) => { 18 | const reader = new FileReader(); 19 | reader.readAsDataURL(blob); 20 | return new Promise((resolve) => { 21 | reader.onloadend = () => { 22 | resolve(reader.result); 23 | }; 24 | }); 25 | }; 26 | 27 | const base64ToBlob = (base64) => { 28 | const parts = base64.split(";base64,"); 29 | const type = parts[0].split(":")[1]; 30 | const byteCharacters = atob(parts[1]); 31 | const byteNumbers = new Array(byteCharacters.length); 32 | for (let i = 0; i < byteCharacters.length; i++) { 33 | byteNumbers[i] = byteCharacters.charCodeAt(i); 34 | } 35 | const byteArray = new Uint8Array(byteNumbers); 36 | return new Blob([byteArray], { type }); 37 | }; 38 | 39 | const delay = (ms) => { 40 | return new Promise((resolve) => setTimeout(resolve, ms)); 41 | }; 42 | //#endregion 43 | 44 | const generateCacheKey = async (text, config, type = "audio") => { 45 | return `${type}-${config.model}-${config.voice}-${config.speed}-${await sha256(text)}`; 46 | }; 47 | 48 | // Function to split the text into meaningful chunks 49 | const splitText = (text) => { 50 | let chunks = []; 51 | const maxChunkSize = 4096; // Maximum characters per request 52 | const delimiters = [". ", "? ", "! ", "\n"]; // Sensible points to split the text 53 | 54 | while (text.length > 0) { 55 | if (text.length <= maxChunkSize) { 56 | chunks.push(text); 57 | break; 58 | } 59 | let end = maxChunkSize; 60 | for (let delimiter of delimiters) { 61 | let pos = text.lastIndexOf(delimiter, maxChunkSize); 62 | if (pos > -1) { 63 | end = pos + delimiter.length; 64 | break; 65 | } 66 | } 67 | chunks.push(text.substring(0, end)); 68 | text = text.substring(end); 69 | } 70 | return chunks; 71 | }; 72 | 73 | // Function to handle API requests and concatenating audio with rate limiting 74 | const fetchAndConcatenateAudio = async ( 75 | textChunks, 76 | config, 77 | progressFn = null 78 | ) => { 79 | const rpm = 100; // Maximum requests per minute 80 | const interval = 60000 / rpm; // Time between requests in milliseconds 81 | 82 | let audioBlobs = []; 83 | progressFn(0); 84 | for (let i = 0; i < textChunks.length; i++) { 85 | if (progressFn) { 86 | progressFn(i / textChunks.length); 87 | } 88 | 89 | const chunk = textChunks[i]; 90 | const cacheKey = await generateCacheKey(chunk, config, "chunk"); 91 | 92 | let cachedBlob = null; 93 | 94 | let cachedBase64 = localStorage.getItem(cacheKey); 95 | if (cachedBase64) { 96 | cachedBlob = base64ToBlob(cachedBase64); 97 | } 98 | if (!cachedBlob) { 99 | if (i > 0 && i % rpm === 0) { 100 | await delay(60000); // Wait for a minute after 100 requests 101 | } else if (i > 0) { 102 | await delay(interval); // Wait the required interval before the next request 103 | } 104 | 105 | const response = await fetch(`${OPENAPI_URL}/audio/speech`, { 106 | method: "POST", 107 | headers: { 108 | Authorization: `Bearer ${config.apiKey}`, 109 | "Content-Type": "application/json", 110 | }, 111 | body: JSON.stringify({ 112 | model: config.model, 113 | input: chunk, 114 | voice: config.voice, 115 | speed: config.speed, 116 | }), 117 | }); 118 | 119 | if (!response.ok) { 120 | throw new Error( 121 | "Failed to convert text to speech:\n\n" + (await response.text()) 122 | ); 123 | } 124 | 125 | const blob = await response.blob(); 126 | const base64 = await blobToBase64(blob); 127 | try { 128 | localStorage.setItem(cacheKey, base64); // Cache the new audio file 129 | } catch (error) { 130 | console.error("Failed to cache audio file:", error); 131 | } 132 | 133 | audioBlobs.push(blob); 134 | } 135 | } 136 | progressFn(1); 137 | 138 | return new Blob(audioBlobs, { type: "audio/mp3" }); 139 | }; 140 | 141 | const convert = async () => { 142 | const text = document.getElementById("textInput").value; 143 | const voice = document.getElementById("voiceSelect").value; 144 | const model = document.getElementById("modelSelect").value; 145 | const apiKey = document.getElementById("apiKeyInput").value; 146 | const speed = parseFloat(document.getElementById("speedInput").value); 147 | 148 | const cacheKey = await generateCacheKey(text, { voice, model, speed }); 149 | 150 | // Check cache first 151 | let cachedBase64 = localStorage.getItem(cacheKey); 152 | if (cachedBase64) { 153 | const cachedBlob = base64ToBlob(cachedBase64); 154 | const cachedUrl = URL.createObjectURL(cachedBlob); 155 | document.getElementById("audioPlayer").src = cachedUrl; 156 | return; 157 | } 158 | 159 | const button = document.getElementById("convertBtn"); 160 | let audioBlob = null; 161 | try { 162 | // gray out the button while processing 163 | button.disabled = true; 164 | button.innerText = "Converting..."; 165 | 166 | const textChunks = splitText(text); 167 | audioBlob = await fetchAndConcatenateAudio( 168 | textChunks, 169 | { voice, model, apiKey, speed }, 170 | (progress) => { 171 | button.innerText = `Converting... (${(progress * 100).toFixed(0)}%)`; 172 | } 173 | ); 174 | } catch (error) { 175 | alert(error.message); 176 | await updatePricing(); 177 | return; 178 | } finally { 179 | button.disabled = false; 180 | } 181 | 182 | const url = URL.createObjectURL(audioBlob); 183 | document.getElementById("audioPlayer").src = url; 184 | 185 | try { 186 | localStorage.setItem(cacheKey, await blobToBase64(audioBlob)); 187 | } catch (error) { 188 | console.error("Failed to cache audio file:", error); 189 | } 190 | localStorage.setItem("apiKey", apiKey); 191 | 192 | await updatePricing(); // Optionally update pricing or status message here 193 | }; 194 | 195 | const updatePricing = async () => { 196 | const text = document.getElementById("textInput").value; 197 | const voice = document.getElementById("voiceSelect").value; 198 | const model = document.getElementById("modelSelect").value; 199 | const speed = document.getElementById("speedInput").value; 200 | 201 | // Also update speed label with speed 202 | document.getElementById("speedLabel").innerText = `Speed (${speed}x)`; 203 | 204 | const cacheKey = await generateCacheKey(text, { model, voice, speed }); 205 | 206 | // Check cache first 207 | let cachedBase64 = localStorage.getItem(cacheKey); 208 | if (cachedBase64) { 209 | document.getElementById("convertBtn").innerText = 210 | "Convert to Speech (cached)"; 211 | return; 212 | } 213 | 214 | const pricePerMillion = PRICES_PER_MILLION[model]; 215 | const price = (text.length / 1000000) * pricePerMillion; 216 | const cents = price * 100; 217 | document.getElementById( 218 | "convertBtn" 219 | ).innerText = `Convert to Speech (¢${cents.toFixed(2)})`; 220 | }; 221 | 222 | const init = async () => { 223 | // Load the API key from cache 224 | const apiKey = localStorage.getItem("apiKey"); 225 | if (apiKey) { 226 | document.getElementById("apiKeyInput").value = apiKey; 227 | } 228 | 229 | document.getElementById("textInput").addEventListener("input", updatePricing); 230 | document 231 | .getElementById("voiceSelect") 232 | .addEventListener("change", updatePricing); 233 | document 234 | .getElementById("modelSelect") 235 | .addEventListener("change", updatePricing); 236 | document 237 | .getElementById("speedInput") 238 | .addEventListener("input", updatePricing); 239 | document 240 | .getElementById("convertBtn") 241 | .addEventListener("click", convert); 242 | 243 | document.getElementById("speedInput").min = "0.25"; 244 | document.getElementById("speedInput").max = "4.0"; 245 | document.getElementById("speedInput").step = "0.05"; 246 | document.getElementById("speedInput").value = "1.0"; 247 | await updatePricing(); 248 | }; 249 | 250 | init(); 251 | --------------------------------------------------------------------------------