├── .github
    └── workflows
    │   └── static.yml
├── .prettierrc.yml
├── LICENSE.md
├── README.md
└── src
    ├── index.html
    └── index.js


/.github/workflows/static.yml:
--------------------------------------------------------------------------------
 1 | # Simple workflow for deploying static content to GitHub Pages
 2 | name: Deploy static content to Pages
 3 | 
 4 | on:
 5 |   # Runs on pushes targeting the default branch
 6 |   push:
 7 |     branches: ["main"]
 8 | 
 9 |   # Allows you to run this workflow manually from the Actions tab
10 |   workflow_dispatch:
11 | 
12 | # Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages
13 | permissions:
14 |   contents: read
15 |   pages: write
16 |   id-token: write
17 | 
18 | # Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued.
19 | # However, do NOT cancel in-progress runs as we want to allow these production deployments to complete.
20 | concurrency:
21 |   group: "pages"
22 |   cancel-in-progress: false
23 | 
24 | jobs:
25 |   # Single deploy job since we're just deploying
26 |   deploy:
27 |     environment:
28 |       name: github-pages
29 |       url: ${{ steps.deployment.outputs.page_url }}
30 |     runs-on: ubuntu-latest
31 |     steps:
32 |       - name: Checkout
33 |         uses: actions/checkout@v4
34 |       - name: Setup Pages
35 |         uses: actions/configure-pages@v5
36 |       - name: Upload artifact
37 |         uses: actions/upload-pages-artifact@v3
38 |         with:
39 |           # Upload entire repository
40 |           path: src
41 |       - name: Deploy to GitHub Pages
42 |         id: deployment
43 |         uses: actions/deploy-pages@v4
44 | 


--------------------------------------------------------------------------------
/.prettierrc.yml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LinqLover/simple-openai-tts-playground/5ec5c60f15c01bb8a5fabc7fbb728e747023ea4d/.prettierrc.yml


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | # License
 2 | 
 3 | ## MIT License
 4 | 
 5 | Copyright © 2024 Christoph Thiede
 6 | 
 7 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 8 | 
 9 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
10 | 
11 | **The Software is provided "as is", without warranty of any kind, express or implied, including but not limited to the warranties of merchantability, fitness for a particular purpose, and non-infringement. In no event shall the authors or copyright holders be liable for any claim, damages, or other liability, whether in an action of contract, tort or otherwise, arising from,
12 | out of or in connection with the Software or the use or other dealings in the Software.**
13 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Simple OpenAI TTS Playground
 2 | 
 3 | > Try out the [OpenAI Text to Speech API](https://platform.openai.com/docs/api-reference/audio) in your browser.
 4 | 
 5 | Minimal yet working prototype. Feel free to propose features and contribute PRs!
 6 | 
 7 | **Visit the playground here: <https://linqlover.github.io/simple-openai-tts-playground>**
 8 | 
 9 | ## Current Features
10 | 
11 | - Select different voices and models
12 | - Automatic division of long texts
13 | - Price display
14 | - Caching of audio files (in local browser storage)
15 | 
16 | ## Development
17 | 
18 | To run the playground locally:
19 | 
20 | 1. Check out the repository
21 | 2. Host a web server using `python3 -m http.server` (or using node analogously)
22 | 3. Open the playground on `http://localhost:8000/` (NOT `http://0.0.0.0/` because browsers might treat it as insecure context)
23 | 


--------------------------------------------------------------------------------
/src/index.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html lang="en">
  3 |   <head>
  4 |     <meta charset="UTF-8" />
  5 |     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
  6 |     <meta
  7 |       name="description"
  8 |       content="A simple playground to try out the OpenAI Text to Speech API in your browser."
  9 |     />
 10 |     <title>Simple OpenAI TTS Playground</title>
 11 |     <link
 12 |       href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.3/dist/css/bootstrap.min.css"
 13 |       rel="stylesheet"
 14 |       integrity="sha384-QWTKZyjpPEjISv5WaRU9OFeRpok6YctnYmDr5pNlyT2bRjXh0JMhjY6hW+ALEwIH"
 15 |       crossorigin="anonymous"
 16 |     />
 17 |   </head>
 18 |   <body>
 19 |     <div class="col-lg-8 mx-auto p-4 py-md-5">
 20 |       <header class="d-flex align-items-center pb-3 mb-5 border-bottom">
 21 |         <a
 22 |           href="."
 23 |           class="d-flex align-items-center text-body-emphasis text-decoration-none"
 24 |         >
 25 |           <span class="fs-4">Simple OpenAI TTS Playground</span>
 26 |         </a>
 27 |       </header>
 28 | 
 29 |       <main>
 30 |         <h1 class="text-body-emphasis">Simple OpenAI TTS Playground</h1>
 31 |         <p class="fs-5 col-md-8">
 32 |           Try out the
 33 |           <a href="https://platform.openai.com/docs/api-reference/audio"
 34 |             >OpenAI Text to Speech API</a
 35 |           >
 36 |           in your browser.
 37 |         </p>
 38 |         <p>
 39 |           Minimal yet working prototype.
 40 |           <a href="https://github.com/LinqLover/simple-openai-tts-playground"
 41 |             >Visit this project on GitHub</a
 42 |           >
 43 |           and feel free to contribute. <strong>Disclaimer:</strong> This project
 44 |           is not affiliated with OpenAI.
 45 |         </p>
 46 |         <p>
 47 |           You can
 48 |           <a href="https://platform.openai.com/api-keys">create an API key</a>
 49 |           or <a href="https://platform.openai.com/usage">track your usage</a> in
 50 |           the OpenAI platform.
 51 |         </p>
 52 | 
 53 |         <div class="mb-5">
 54 |           <textarea
 55 |             id="textInput"
 56 |             class="form-control mb-3"
 57 |             rows="6"
 58 |             placeholder="Enter text here..."
 59 |           ></textarea>
 60 |           <select id="voiceSelect" class="form-control mb-3" title="Select Voice">
 61 |             <option value="alloy">Alloy</option>
 62 |             <option value="echo">Echo</option>
 63 |             <option value="fable">Fable</option>
 64 |             <option value="onyx">Onyx</option>
 65 |             <option value="nova">Nova</option>
 66 |             <option value="shimmer">Shimmer</option>
 67 |           </select>
 68 |           <select id="modelSelect" class="form-control mb-3" title="Select Model">
 69 |             <option value="tts-1">tts-1</option>
 70 |             <option value="tts-1-hd" selected>tts-1-hd</option>
 71 |             </select>
 72 |           <label for="speedInput" class="form-label" id="speedLabel">Speed</label>
 73 |           <input
 74 |             type="range"
 75 |             id="speedInput"
 76 |             class="form-range mb-3"
 77 |           />
 78 |           <input
 79 |             type="text"
 80 |             id="apiKeyInput"
 81 |             class="form-control mb-3"
 82 |             placeholder="Enter API Key"
 83 |           />
 84 |           <button id="convertBtn" class="btn btn-primary">
 85 |             Convert to Speech (¢0.00)
 86 |           </button>
 87 |           <audio
 88 |             id="audioPlayer"
 89 |             controls
 90 |             class="mt-3"
 91 |             style="width: 100%"
 92 |           ></audio>
 93 |         </div>
 94 |       </main>
 95 |     </div>
 96 | 
 97 |     <script src="index.js"></script>
 98 |   </body>
 99 | </html>
100 | 


--------------------------------------------------------------------------------
/src/index.js:
--------------------------------------------------------------------------------
  1 | //#region Constants
  2 | const OPENAPI_URL = "https://api.openai.com/v1";
  3 | const PRICES_PER_MILLION = {
  4 |   "tts-1": 15.0,
  5 |   "tts-1-hd": 30.0,
  6 | };
  7 | //#endregion
  8 | 
  9 | //#region Utils
 10 | const sha256 = async (source) => {
 11 |   const sourceBytes = new TextEncoder().encode(source);
 12 |   const digest = await crypto.subtle.digest("SHA-256", sourceBytes);
 13 |   const resultBytes = [...new Uint8Array(digest)];
 14 |   return resultBytes.map((x) => x.toString(16).padStart(2, "0")).join("");
 15 | };
 16 | 
 17 | const blobToBase64 = (blob) => {
 18 |   const reader = new FileReader();
 19 |   reader.readAsDataURL(blob);
 20 |   return new Promise((resolve) => {
 21 |     reader.onloadend = () => {
 22 |       resolve(reader.result);
 23 |     };
 24 |   });
 25 | };
 26 | 
 27 | const base64ToBlob = (base64) => {
 28 |   const parts = base64.split(";base64,");
 29 |   const type = parts[0].split(":")[1];
 30 |   const byteCharacters = atob(parts[1]);
 31 |   const byteNumbers = new Array(byteCharacters.length);
 32 |   for (let i = 0; i < byteCharacters.length; i++) {
 33 |     byteNumbers[i] = byteCharacters.charCodeAt(i);
 34 |   }
 35 |   const byteArray = new Uint8Array(byteNumbers);
 36 |   return new Blob([byteArray], { type });
 37 | };
 38 | 
 39 | const delay = (ms) => {
 40 |   return new Promise((resolve) => setTimeout(resolve, ms));
 41 | };
 42 | //#endregion
 43 | 
 44 | const generateCacheKey = async (text, config, type = "audio") => {
 45 |   return `${type}-${config.model}-${config.voice}-${config.speed}-${await sha256(text)}`;
 46 | };
 47 | 
 48 | // Function to split the text into meaningful chunks
 49 | const splitText = (text) => {
 50 |   let chunks = [];
 51 |   const maxChunkSize = 4096; // Maximum characters per request
 52 |   const delimiters = [". ", "? ", "! ", "\n"]; // Sensible points to split the text
 53 | 
 54 |   while (text.length > 0) {
 55 |     if (text.length <= maxChunkSize) {
 56 |       chunks.push(text);
 57 |       break;
 58 |     }
 59 |     let end = maxChunkSize;
 60 |     for (let delimiter of delimiters) {
 61 |       let pos = text.lastIndexOf(delimiter, maxChunkSize);
 62 |       if (pos > -1) {
 63 |         end = pos + delimiter.length;
 64 |         break;
 65 |       }
 66 |     }
 67 |     chunks.push(text.substring(0, end));
 68 |     text = text.substring(end);
 69 |   }
 70 |   return chunks;
 71 | };
 72 | 
 73 | // Function to handle API requests and concatenating audio with rate limiting
 74 | const fetchAndConcatenateAudio = async (
 75 |   textChunks,
 76 |   config,
 77 |   progressFn = null
 78 | ) => {
 79 |   const rpm = 100; // Maximum requests per minute
 80 |   const interval = 60000 / rpm; // Time between requests in milliseconds
 81 | 
 82 |   let audioBlobs = [];
 83 |   progressFn(0);
 84 |   for (let i = 0; i < textChunks.length; i++) {
 85 |     if (progressFn) {
 86 |       progressFn(i / textChunks.length);
 87 |     }
 88 | 
 89 |     const chunk = textChunks[i];
 90 |     const cacheKey = await generateCacheKey(chunk, config, "chunk");
 91 | 
 92 |     let cachedBlob = null;
 93 | 
 94 |     let cachedBase64 = localStorage.getItem(cacheKey);
 95 |     if (cachedBase64) {
 96 |       cachedBlob = base64ToBlob(cachedBase64);
 97 |     }
 98 |     if (!cachedBlob) {
 99 |       if (i > 0 && i % rpm === 0) {
100 |         await delay(60000); // Wait for a minute after 100 requests
101 |       } else if (i > 0) {
102 |         await delay(interval); // Wait the required interval before the next request
103 |       }
104 | 
105 |       const response = await fetch(`${OPENAPI_URL}/audio/speech`, {
106 |         method: "POST",
107 |         headers: {
108 |           Authorization: `Bearer ${config.apiKey}`,
109 |           "Content-Type": "application/json",
110 |         },
111 |         body: JSON.stringify({
112 |           model: config.model,
113 |           input: chunk,
114 |           voice: config.voice,
115 |           speed: config.speed,
116 |         }),
117 |       });
118 | 
119 |       if (!response.ok) {
120 |         throw new Error(
121 |           "Failed to convert text to speech:\n\n" + (await response.text())
122 |         );
123 |       }
124 | 
125 |       const blob = await response.blob();
126 |       const base64 = await blobToBase64(blob);
127 |       try {
128 |         localStorage.setItem(cacheKey, base64); // Cache the new audio file
129 |       } catch (error) {
130 |         console.error("Failed to cache audio file:", error);
131 |       }
132 | 
133 |       audioBlobs.push(blob);
134 |     }
135 |   }
136 |   progressFn(1);
137 | 
138 |   return new Blob(audioBlobs, { type: "audio/mp3" });
139 | };
140 | 
141 | const convert = async () => {
142 |   const text = document.getElementById("textInput").value;
143 |   const voice = document.getElementById("voiceSelect").value;
144 |   const model = document.getElementById("modelSelect").value;
145 |   const apiKey = document.getElementById("apiKeyInput").value;
146 |   const speed = parseFloat(document.getElementById("speedInput").value);
147 | 
148 |   const cacheKey = await generateCacheKey(text, { voice, model, speed });
149 | 
150 |   // Check cache first
151 |   let cachedBase64 = localStorage.getItem(cacheKey);
152 |   if (cachedBase64) {
153 |     const cachedBlob = base64ToBlob(cachedBase64);
154 |     const cachedUrl = URL.createObjectURL(cachedBlob);
155 |     document.getElementById("audioPlayer").src = cachedUrl;
156 |     return;
157 |   }
158 | 
159 |   const button = document.getElementById("convertBtn");
160 |   let audioBlob = null;
161 |   try {
162 |     // gray out the button while processing
163 |     button.disabled = true;
164 |     button.innerText = "Converting...";
165 | 
166 |     const textChunks = splitText(text);
167 |     audioBlob = await fetchAndConcatenateAudio(
168 |       textChunks,
169 |       { voice, model, apiKey, speed },
170 |       (progress) => {
171 |         button.innerText = `Converting... (${(progress * 100).toFixed(0)}%)`;
172 |       }
173 |     );
174 |   } catch (error) {
175 |     alert(error.message);
176 |     await updatePricing();
177 |     return;
178 |   } finally {
179 |     button.disabled = false;
180 |   }
181 | 
182 |   const url = URL.createObjectURL(audioBlob);
183 |   document.getElementById("audioPlayer").src = url;
184 | 
185 |   try {
186 |     localStorage.setItem(cacheKey, await blobToBase64(audioBlob));
187 |   } catch (error) {
188 |     console.error("Failed to cache audio file:", error);
189 |   }
190 |   localStorage.setItem("apiKey", apiKey);
191 | 
192 |   await updatePricing(); // Optionally update pricing or status message here
193 | };
194 | 
195 | const updatePricing = async () => {
196 |   const text = document.getElementById("textInput").value;
197 |   const voice = document.getElementById("voiceSelect").value;
198 |   const model = document.getElementById("modelSelect").value;
199 |   const speed = document.getElementById("speedInput").value;
200 | 
201 |   // Also update speed label with speed
202 |   document.getElementById("speedLabel").innerText = `Speed (${speed}x)`;
203 | 
204 |   const cacheKey = await generateCacheKey(text, { model, voice, speed });
205 | 
206 |   // Check cache first
207 |   let cachedBase64 = localStorage.getItem(cacheKey);
208 |   if (cachedBase64) {
209 |     document.getElementById("convertBtn").innerText =
210 |       "Convert to Speech (cached)";
211 |     return;
212 |   }
213 | 
214 |   const pricePerMillion = PRICES_PER_MILLION[model];
215 |   const price = (text.length / 1000000) * pricePerMillion;
216 |   const cents = price * 100;
217 |   document.getElementById(
218 |     "convertBtn"
219 |   ).innerText = `Convert to Speech (¢${cents.toFixed(2)})`;
220 | };
221 | 
222 | const init = async () => {
223 |   // Load the API key from cache
224 |   const apiKey = localStorage.getItem("apiKey");
225 |   if (apiKey) {
226 |     document.getElementById("apiKeyInput").value = apiKey;
227 |   }
228 | 
229 |   document.getElementById("textInput").addEventListener("input", updatePricing);
230 |   document
231 |     .getElementById("voiceSelect")
232 |     .addEventListener("change", updatePricing);
233 |   document
234 |     .getElementById("modelSelect")
235 |     .addEventListener("change", updatePricing);
236 |   document
237 |     .getElementById("speedInput")
238 |     .addEventListener("input", updatePricing);
239 |   document
240 |     .getElementById("convertBtn")
241 |     .addEventListener("click", convert);
242 | 
243 |   document.getElementById("speedInput").min = "0.25";
244 |   document.getElementById("speedInput").max = "4.0";
245 |   document.getElementById("speedInput").step = "0.05";
246 |   document.getElementById("speedInput").value = "1.0";
247 |   await updatePricing();
248 | };
249 | 
250 | init();
251 | 


--------------------------------------------------------------------------------