├── .eslintrc.json ├── .gitignore ├── .vscode └── settings.json ├── LICENSE ├── README.md ├── components.json ├── data ├── voices.11labs.json ├── voices.index.json └── voices.lmnt.json ├── example ├── example-1.png ├── example-2.png └── example-3.png ├── next.config.js ├── package-lock.json ├── package.json ├── pages ├── _app.tsx ├── _document.tsx ├── globals.css └── index.tsx ├── postcss.config.js ├── src ├── components │ └── ui │ │ ├── aspect-ratio.tsx │ │ ├── button.tsx │ │ ├── card.tsx │ │ └── skeleton.tsx ├── lib │ └── utils.ts └── storyteller │ ├── VoiceManager.ts │ ├── prepareVoices.script.ts │ ├── server.ts │ ├── storyTellerFlow.ts │ └── storytellerSchema.ts ├── tailwind.config.js └── tsconfig.json /.eslintrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "next/core-web-vitals" 3 | } 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .env 2 | 3 | stories 4 | runs 5 | 6 | # dependencies 7 | /node_modules 8 | /.pnp 9 | .pnp.js 10 | 11 | # testing 12 | /coverage 13 | 14 | # next.js 15 | /.next/ 16 | /out/ 17 | 18 | # production 19 | /build 20 | 21 | # misc 22 | .DS_Store 23 | *.pem 24 | 25 | # debug 26 | npm-debug.log* 27 | yarn-debug.log* 28 | yarn-error.log* 29 | 30 | # local env files 31 | .env*.local 32 | 33 | # vercel 34 | .vercel 35 | 36 | # typescript 37 | *.tsbuildinfo 38 | next-env.d.ts 39 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "cSpell.words": ["Fastify", "modelfusion", "openai"] 3 | } 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Lars Grammel 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Storyteller 2 | 3 | StoryTeller is an experimental web application that creates short audio stories for pre-school kids. 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 |
12 | 13 | ## Example Story 14 | 15 | https://github.com/lgrammel/storyteller/assets/205036/963df672-a4fc-4d0f-a74c-1e37c1246763 16 | 17 | ## How it works 18 | 19 | ![full](https://github.com/lgrammel/storyteller/assets/205036/c11ec999-0fae-4d69-8610-34932e75555f) 20 | 21 | The main flow from the diagram can be found in [generateStoryFlow](https://github.com/lgrammel/storyteller/blob/main/src/storyteller/storyTellerFlow.ts). Most of the UI is implemented in [index.tsx](https://github.com/lgrammel/storyteller/blob/main/pages/index.tsx). 22 | 23 | ## Development 24 | 25 | 1. Install dependencies: `npm i` 26 | 1. Setup `.env` with required keys and base url: 27 | 28 | ``` 29 | NEXT_PUBLIC_BASE_URL="http://localhost:3001" 30 | OPENAI_API_KEY="sk-..." 31 | ELEVENLABS_API_KEY="..." 32 | LMNT_API_KEY="..." 33 | STABILITY_API_KEY="sk-..." 34 | ``` 35 | 36 | You can get the API keys from the respective services: 37 | 38 | - [OpenAI](https://platform.openai.com/) 39 | - [Eleven Labs](https://elevenlabs.io/) 40 | - [Lmnt](https://lmnt.com/) 41 | - [Stability](https://platform.stability.ai/) 42 | 43 | 1. Start Fastify server (runs on port 3001): `npm run fastify` 44 | 1. Start Next.js server for UI (runs on port 3000): `npm run next` 45 | 1. Go to [localhost:3000](http://localhost:3000) 46 | 47 | ## Deployment 48 | 49 | 1. Build the Next.js app: `npm run build`. This exports the app to the `out` folder, so it can be served by Fastify. 50 | 51 | ### Deploy to Render 52 | 53 | StoryTeller can be deployed as a [Render](https://render.com/) web service. 54 | 55 | - Build command: `npm i; npm run build` 56 | - Start command: `npm start` 57 | - Configure the `NEXT_PUBLIC_BASE_URL` environment variable to the Render service URL (**base url must not have trailing slash**) 58 | - Add the API keys to the Render Environment settings. 59 | 60 | ## Used Libraries and AI services 61 | 62 | StoryTeller is built with the following libraries: 63 | 64 | - [ModelFusion](https://github.com/lgrammel/modelfusion) 65 | - [Fastify](https://www.fastify.io/) 66 | - [Next.js](https://nextjs.org/) 67 | - [shadcn/ui](https://ui.shadcn.com/) 68 | - [Zod](https://zod.dev/) 69 | 70 | The following AI APIs are used: 71 | 72 | - [OpenAI](https://platform.openai.com/) 73 | - [Eleven Labs](https://elevenlabs.io/) 74 | - [Lmnt](https://lmnt.com/) 75 | - [Stability](https://platform.stability.ai/) 76 | 77 | ## Limitations 78 | 79 | - No persistent execution with error handling 80 | - No user accounts / persistent data (only storage of image and audio files) 81 | - No user authentication / authorization 82 | - Limited error handling on the client 83 | - Tested on Chrome Desktop (Mac). Other browsers & operating systems may not work. 84 | -------------------------------------------------------------------------------- /components.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://ui.shadcn.com/schema.json", 3 | "style": "default", 4 | "rsc": true, 5 | "tsx": true, 6 | "tailwind": { 7 | "config": "tailwind.config.js", 8 | "css": "app/globals.css", 9 | "baseColor": "slate", 10 | "cssVariables": true 11 | }, 12 | "aliases": { 13 | "components": "@/components", 14 | "utils": "@/lib/utils" 15 | } 16 | } -------------------------------------------------------------------------------- /data/voices.11labs.json: -------------------------------------------------------------------------------- 1 | { 2 | "voices": [ 3 | { 4 | "voice_id": "21m00Tcm4TlvDq8ikWAM", 5 | "name": "Rachel", 6 | "samples": null, 7 | "category": "premade", 8 | "fine_tuning": { 9 | "language": null, 10 | "is_allowed_to_fine_tune": false, 11 | "fine_tuning_requested": false, 12 | "finetuning_state": "not_started", 13 | "verification_attempts": null, 14 | "verification_failures": [], 15 | "verification_attempts_count": 0, 16 | "slice_ids": null, 17 | "manual_verification": null, 18 | "manual_verification_requested": false 19 | }, 20 | "labels": { 21 | "accent": "american", 22 | "description": "calm", 23 | "age": "young", 24 | "gender": "female", 25 | "use case": "narration" 26 | }, 27 | "description": null, 28 | "preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/21m00Tcm4TlvDq8ikWAM/df6788f9-5c96-470d-8312-aab3b3d8f50a.mp3", 29 | "available_for_tiers": [], 30 | "settings": null, 31 | "sharing": null, 32 | "high_quality_base_model_ids": [] 33 | }, 34 | { 35 | "voice_id": "2EiwWnXFnvU5JabPnv8n", 36 | "name": "Clyde", 37 | "samples": null, 38 | "category": "premade", 39 | "fine_tuning": { 40 | "language": null, 41 | "is_allowed_to_fine_tune": false, 42 | "fine_tuning_requested": false, 43 | "finetuning_state": "not_started", 44 | "verification_attempts": null, 45 | "verification_failures": [], 46 | "verification_attempts_count": 0, 47 | "slice_ids": null, 48 | "manual_verification": null, 49 | "manual_verification_requested": false 50 | }, 51 | "labels": { 52 | "accent": "american", 53 | "description": "war veteran", 54 | "age": "middle aged", 55 | "gender": "male", 56 | "use case": "video games" 57 | }, 58 | "description": null, 59 | "preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/2EiwWnXFnvU5JabPnv8n/65d80f52-703f-4cae-a91d-75d4e200ed02.mp3", 60 | "available_for_tiers": [], 61 | "settings": null, 62 | "sharing": null, 63 | "high_quality_base_model_ids": ["eleven_multilingual_v1"] 64 | }, 65 | { 66 | "voice_id": "AZnzlk1XvdvUeBnXmlld", 67 | "name": "Domi", 68 | "samples": null, 69 | "category": "premade", 70 | "fine_tuning": { 71 | "language": null, 72 | "is_allowed_to_fine_tune": false, 73 | "fine_tuning_requested": false, 74 | "finetuning_state": "not_started", 75 | "verification_attempts": null, 76 | "verification_failures": [], 77 | "verification_attempts_count": 0, 78 | "slice_ids": null, 79 | "manual_verification": null, 80 | "manual_verification_requested": false 81 | }, 82 | "labels": { 83 | "accent": "american", 84 | "description": "strong", 85 | "age": "young", 86 | "gender": "female", 87 | "use case": "narration" 88 | }, 89 | "description": null, 90 | "preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/AZnzlk1XvdvUeBnXmlld/508e12d0-a7f7-4d86-a0d3-f3884ff353ed.mp3", 91 | "available_for_tiers": [], 92 | "settings": null, 93 | "sharing": null, 94 | "high_quality_base_model_ids": [] 95 | }, 96 | { 97 | "voice_id": "CYw3kZ02Hs0563khs1Fj", 98 | "name": "Dave", 99 | "samples": null, 100 | "category": "premade", 101 | "fine_tuning": { 102 | "language": null, 103 | "is_allowed_to_fine_tune": false, 104 | "fine_tuning_requested": false, 105 | "finetuning_state": "not_started", 106 | "verification_attempts": null, 107 | "verification_failures": [], 108 | "verification_attempts_count": 0, 109 | "slice_ids": null, 110 | "manual_verification": null, 111 | "manual_verification_requested": false 112 | }, 113 | "labels": { 114 | "accent": "british-essex", 115 | "description": "conversational", 116 | "age": "young", 117 | "gender": "male", 118 | "use case": "video games" 119 | }, 120 | "description": null, 121 | "preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/CYw3kZ02Hs0563khs1Fj/872cb056-45d3-419e-b5c6-de2b387a93a0.mp3", 122 | "available_for_tiers": [], 123 | "settings": null, 124 | "sharing": null, 125 | "high_quality_base_model_ids": ["eleven_multilingual_v1"] 126 | }, 127 | { 128 | "voice_id": "D38z5RcWu1voky8WS1ja", 129 | "name": "Fin", 130 | "samples": null, 131 | "category": "premade", 132 | "fine_tuning": { 133 | "language": null, 134 | "is_allowed_to_fine_tune": false, 135 | "fine_tuning_requested": false, 136 | "finetuning_state": "not_started", 137 | "verification_attempts": null, 138 | "verification_failures": [], 139 | "verification_attempts_count": 0, 140 | "slice_ids": null, 141 | "manual_verification": null, 142 | "manual_verification_requested": false 143 | }, 144 | "labels": { 145 | "accent": "irish", 146 | "description": "sailor", 147 | "age": "old", 148 | "gender": "male", 149 | "use case": "video games" 150 | }, 151 | "description": null, 152 | "preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/D38z5RcWu1voky8WS1ja/a470ba64-1e72-46d9-ba9d-030c4155e2d2.mp3", 153 | "available_for_tiers": [], 154 | "settings": null, 155 | "sharing": null, 156 | "high_quality_base_model_ids": ["eleven_multilingual_v1"] 157 | }, 158 | { 159 | "voice_id": "EXAVITQu4vr4xnSDxMaL", 160 | "name": "Bella", 161 | "samples": null, 162 | "category": "premade", 163 | "fine_tuning": { 164 | "language": null, 165 | "is_allowed_to_fine_tune": false, 166 | "fine_tuning_requested": false, 167 | "finetuning_state": "not_started", 168 | "verification_attempts": null, 169 | "verification_failures": [], 170 | "verification_attempts_count": 0, 171 | "slice_ids": null, 172 | "manual_verification": null, 173 | "manual_verification_requested": false 174 | }, 175 | "labels": { 176 | "accent": "american", 177 | "description": "soft", 178 | "age": "young", 179 | "gender": "female", 180 | "use case": "narration" 181 | }, 182 | "description": null, 183 | "preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/EXAVITQu4vr4xnSDxMaL/941b779e-c2ad-48d4-bddb-28d1a68fa27e.mp3", 184 | "available_for_tiers": [], 185 | "settings": null, 186 | "sharing": null, 187 | "high_quality_base_model_ids": [] 188 | }, 189 | { 190 | "voice_id": "ErXwobaYiN019PkySvjV", 191 | "name": "Antoni", 192 | "samples": null, 193 | "category": "premade", 194 | "fine_tuning": { 195 | "language": null, 196 | "is_allowed_to_fine_tune": false, 197 | "fine_tuning_requested": false, 198 | "finetuning_state": "not_started", 199 | "verification_attempts": null, 200 | "verification_failures": [], 201 | "verification_attempts_count": 0, 202 | "slice_ids": null, 203 | "manual_verification": null, 204 | "manual_verification_requested": false 205 | }, 206 | "labels": { 207 | "accent": "american", 208 | "description": "well-rounded", 209 | "age": "young", 210 | "gender": "male", 211 | "use case": "narration" 212 | }, 213 | "description": null, 214 | "preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/ErXwobaYiN019PkySvjV/ee9ac367-91ee-4a56-818a-2bd1a9dbe83a.mp3", 215 | "available_for_tiers": [], 216 | "settings": null, 217 | "sharing": null, 218 | "high_quality_base_model_ids": [] 219 | }, 220 | { 221 | "voice_id": "GBv7mTt0atIp3Br8iCZE", 222 | "name": "Thomas", 223 | "samples": null, 224 | "category": "premade", 225 | "fine_tuning": { 226 | "language": null, 227 | "is_allowed_to_fine_tune": false, 228 | "fine_tuning_requested": false, 229 | "finetuning_state": "not_started", 230 | "verification_attempts": null, 231 | "verification_failures": [], 232 | "verification_attempts_count": 0, 233 | "slice_ids": null, 234 | "manual_verification": null, 235 | "manual_verification_requested": false 236 | }, 237 | "labels": { 238 | "accent": "american", 239 | "description": "calm", 240 | "age": "young", 241 | "gender": "male", 242 | "use case": "meditation" 243 | }, 244 | "description": null, 245 | "preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/GBv7mTt0atIp3Br8iCZE/98542988-5267-4148-9a9e-baa8c4f14644.mp3", 246 | "available_for_tiers": [], 247 | "settings": null, 248 | "sharing": null, 249 | "high_quality_base_model_ids": ["eleven_multilingual_v1"] 250 | }, 251 | { 252 | "voice_id": "IKne3meq5aSn9XLyUdCD", 253 | "name": "Charlie", 254 | "samples": null, 255 | "category": "premade", 256 | "fine_tuning": { 257 | "language": null, 258 | "is_allowed_to_fine_tune": false, 259 | "fine_tuning_requested": false, 260 | "finetuning_state": "not_started", 261 | "verification_attempts": null, 262 | "verification_failures": [], 263 | "verification_attempts_count": 0, 264 | "slice_ids": null, 265 | "manual_verification": null, 266 | "manual_verification_requested": false 267 | }, 268 | "labels": { 269 | "accent": "australian", 270 | "description": "casual", 271 | "age": "middle aged", 272 | "gender": "male", 273 | "use case": "conversational" 274 | }, 275 | "description": null, 276 | "preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/IKne3meq5aSn9XLyUdCD/102de6f2-22ed-43e0-a1f1-111fa75c5481.mp3", 277 | "available_for_tiers": [], 278 | "settings": null, 279 | "sharing": null, 280 | "high_quality_base_model_ids": ["eleven_multilingual_v1"] 281 | }, 282 | { 283 | "voice_id": "LcfcDJNUP1GQjkzn1xUU", 284 | "name": "Emily", 285 | "samples": null, 286 | "category": "premade", 287 | "fine_tuning": { 288 | "language": null, 289 | "is_allowed_to_fine_tune": false, 290 | "fine_tuning_requested": false, 291 | "finetuning_state": "not_started", 292 | "verification_attempts": null, 293 | "verification_failures": [], 294 | "verification_attempts_count": 0, 295 | "slice_ids": null, 296 | "manual_verification": null, 297 | "manual_verification_requested": false 298 | }, 299 | "labels": { 300 | "accent": "american", 301 | "description": "calm", 302 | "age": "young", 303 | "gender": "female", 304 | "use case": "meditation" 305 | }, 306 | "description": null, 307 | "preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/LcfcDJNUP1GQjkzn1xUU/e4b994b7-9713-4238-84f3-add8fccaaccd.mp3", 308 | "available_for_tiers": [], 309 | "settings": null, 310 | "sharing": null, 311 | "high_quality_base_model_ids": ["eleven_multilingual_v1"] 312 | }, 313 | { 314 | "voice_id": "MF3mGyEYCl7XYWbV9V6O", 315 | "name": "Elli", 316 | "samples": null, 317 | "category": "premade", 318 | "fine_tuning": { 319 | "language": null, 320 | "is_allowed_to_fine_tune": false, 321 | "fine_tuning_requested": false, 322 | "finetuning_state": "not_started", 323 | "verification_attempts": null, 324 | "verification_failures": [], 325 | "verification_attempts_count": 0, 326 | "slice_ids": null, 327 | "manual_verification": null, 328 | "manual_verification_requested": false 329 | }, 330 | "labels": { 331 | "accent": "american", 332 | "description": "emotional", 333 | "age": "young", 334 | "gender": "female", 335 | "use case": "narration" 336 | }, 337 | "description": null, 338 | "preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/MF3mGyEYCl7XYWbV9V6O/d8ecadea-9e48-4e5d-868a-2ec3d7397861.mp3", 339 | "available_for_tiers": [], 340 | "settings": null, 341 | "sharing": null, 342 | "high_quality_base_model_ids": [] 343 | }, 344 | { 345 | "voice_id": "N2lVS1w4EtoT3dr4eOWO", 346 | "name": "Callum", 347 | "samples": null, 348 | "category": "premade", 349 | "fine_tuning": { 350 | "language": null, 351 | "is_allowed_to_fine_tune": false, 352 | "fine_tuning_requested": false, 353 | "finetuning_state": "not_started", 354 | "verification_attempts": null, 355 | "verification_failures": [], 356 | "verification_attempts_count": 0, 357 | "slice_ids": null, 358 | "manual_verification": null, 359 | "manual_verification_requested": false 360 | }, 361 | "labels": { 362 | "accent": "american", 363 | "description": "hoarse", 364 | "age": "middle aged", 365 | "gender": "male", 366 | "use case": "video games" 367 | }, 368 | "description": null, 369 | "preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/N2lVS1w4EtoT3dr4eOWO/ac833bd8-ffda-4938-9ebc-b0f99ca25481.mp3", 370 | "available_for_tiers": [], 371 | "settings": null, 372 | "sharing": null, 373 | "high_quality_base_model_ids": ["eleven_multilingual_v1"] 374 | }, 375 | { 376 | "voice_id": "ODq5zmih8GrVes37Dizd", 377 | "name": "Patrick", 378 | "samples": null, 379 | "category": "premade", 380 | "fine_tuning": { 381 | "language": null, 382 | "is_allowed_to_fine_tune": false, 383 | "fine_tuning_requested": false, 384 | "finetuning_state": "not_started", 385 | "verification_attempts": null, 386 | "verification_failures": [], 387 | "verification_attempts_count": 0, 388 | "slice_ids": null, 389 | "manual_verification": null, 390 | "manual_verification_requested": false 391 | }, 392 | "labels": { 393 | "accent": "american", 394 | "description": "shouty", 395 | "age": "middle aged", 396 | "gender": "male", 397 | "use case": "video games" 398 | }, 399 | "description": null, 400 | "preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/ODq5zmih8GrVes37Dizd/0ebec87a-2569-4976-9ea5-0170854411a9.mp3", 401 | "available_for_tiers": [], 402 | "settings": null, 403 | "sharing": null, 404 | "high_quality_base_model_ids": ["eleven_multilingual_v1"] 405 | }, 406 | { 407 | "voice_id": "SOYHLrjzK2X1ezoPC6cr", 408 | "name": "Harry", 409 | "samples": null, 410 | "category": "premade", 411 | "fine_tuning": { 412 | "language": null, 413 | "is_allowed_to_fine_tune": false, 414 | "fine_tuning_requested": false, 415 | "finetuning_state": "not_started", 416 | "verification_attempts": null, 417 | "verification_failures": [], 418 | "verification_attempts_count": 0, 419 | "slice_ids": null, 420 | "manual_verification": null, 421 | "manual_verification_requested": false 422 | }, 423 | "labels": { 424 | "accent": "american", 425 | "description": "anxious", 426 | "age": "young", 427 | "gender": "male", 428 | "use case": "video games" 429 | }, 430 | "description": null, 431 | "preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/SOYHLrjzK2X1ezoPC6cr/86d178f6-f4b6-4e0e-85be-3de19f490794.mp3", 432 | "available_for_tiers": [], 433 | "settings": null, 434 | "sharing": null, 435 | "high_quality_base_model_ids": ["eleven_multilingual_v1"] 436 | }, 437 | { 438 | "voice_id": "TX3LPaxmHKxFdv7VOQHJ", 439 | "name": "Liam", 440 | "samples": null, 441 | "category": "premade", 442 | "fine_tuning": { 443 | "language": null, 444 | "is_allowed_to_fine_tune": false, 445 | "fine_tuning_requested": false, 446 | "finetuning_state": "not_started", 447 | "verification_attempts": null, 448 | "verification_failures": [], 449 | "verification_attempts_count": 0, 450 | "slice_ids": null, 451 | "manual_verification": null, 452 | "manual_verification_requested": false 453 | }, 454 | "labels": { 455 | "accent": "american", 456 | "age": "young", 457 | "gender": "male", 458 | "use case": "narration", 459 | "description ": "neutral" 460 | }, 461 | "description": null, 462 | "preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/TX3LPaxmHKxFdv7VOQHJ/63148076-6363-42db-aea8-31424308b92c.mp3", 463 | "available_for_tiers": [], 464 | "settings": null, 465 | "sharing": null, 466 | "high_quality_base_model_ids": ["eleven_multilingual_v1"] 467 | }, 468 | { 469 | "voice_id": "ThT5KcBeYPX3keUQqHPh", 470 | "name": "Dorothy", 471 | "samples": null, 472 | "category": "premade", 473 | "fine_tuning": { 474 | "language": null, 475 | "is_allowed_to_fine_tune": false, 476 | "fine_tuning_requested": false, 477 | "finetuning_state": "not_started", 478 | "verification_attempts": null, 479 | "verification_failures": [], 480 | "verification_attempts_count": 0, 481 | "slice_ids": null, 482 | "manual_verification": null, 483 | "manual_verification_requested": false 484 | }, 485 | "labels": { 486 | "accent": "british", 487 | "description": "pleasant", 488 | "age": "young", 489 | "gender": "female", 490 | "use case": "children's stories" 491 | }, 492 | "description": null, 493 | "preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/ThT5KcBeYPX3keUQqHPh/981f0855-6598-48d2-9f8f-b6d92fbbe3fc.mp3", 494 | "available_for_tiers": [], 495 | "settings": null, 496 | "sharing": null, 497 | "high_quality_base_model_ids": ["eleven_multilingual_v1"] 498 | }, 499 | { 500 | "voice_id": "TxGEqnHWrfWFTfGW9XjX", 501 | "name": "Josh", 502 | "samples": null, 503 | "category": "premade", 504 | "fine_tuning": { 505 | "language": null, 506 | "is_allowed_to_fine_tune": false, 507 | "fine_tuning_requested": false, 508 | "finetuning_state": "not_started", 509 | "verification_attempts": null, 510 | "verification_failures": [], 511 | "verification_attempts_count": 0, 512 | "slice_ids": null, 513 | "manual_verification": null, 514 | "manual_verification_requested": false 515 | }, 516 | "labels": { 517 | "accent": "american", 518 | "description": "deep", 519 | "age": "young", 520 | "gender": "male", 521 | "use case": "narration" 522 | }, 523 | "description": null, 524 | "preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/TxGEqnHWrfWFTfGW9XjX/3ae2fc71-d5f9-4769-bb71-2a43633cd186.mp3", 525 | "available_for_tiers": [], 526 | "settings": null, 527 | "sharing": null, 528 | "high_quality_base_model_ids": [] 529 | }, 530 | { 531 | "voice_id": "VR6AewLTigWG4xSOukaG", 532 | "name": "Arnold", 533 | "samples": null, 534 | "category": "premade", 535 | "fine_tuning": { 536 | "language": null, 537 | "is_allowed_to_fine_tune": false, 538 | "fine_tuning_requested": false, 539 | "finetuning_state": "not_started", 540 | "verification_attempts": null, 541 | "verification_failures": [], 542 | "verification_attempts_count": 0, 543 | "slice_ids": null, 544 | "manual_verification": null, 545 | "manual_verification_requested": false 546 | }, 547 | "labels": { 548 | "accent": "american", 549 | "description": "crisp", 550 | "age": "middle aged", 551 | "gender": "male", 552 | "use case": "narration" 553 | }, 554 | "description": null, 555 | "preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/VR6AewLTigWG4xSOukaG/316050b7-c4e0-48de-acf9-a882bb7fc43b.mp3", 556 | "available_for_tiers": [], 557 | "settings": null, 558 | "sharing": null, 559 | "high_quality_base_model_ids": [] 560 | }, 561 | { 562 | "voice_id": "XB0fDUnXU5powFXDhCwa", 563 | "name": "Charlotte", 564 | "samples": null, 565 | "category": "premade", 566 | "fine_tuning": { 567 | "language": null, 568 | "is_allowed_to_fine_tune": false, 569 | "fine_tuning_requested": false, 570 | "finetuning_state": "not_started", 571 | "verification_attempts": null, 572 | "verification_failures": [], 573 | "verification_attempts_count": 0, 574 | "slice_ids": null, 575 | "manual_verification": null, 576 | "manual_verification_requested": false 577 | }, 578 | "labels": { 579 | "accent": "english-swedish", 580 | "description": "seductive", 581 | "age": "middle aged", 582 | "gender": "female", 583 | "use case": "video games" 584 | }, 585 | "description": null, 586 | "preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/XB0fDUnXU5powFXDhCwa/942356dc-f10d-4d89-bda5-4f8505ee038b.mp3", 587 | "available_for_tiers": [], 588 | "settings": null, 589 | "sharing": null, 590 | "high_quality_base_model_ids": ["eleven_multilingual_v1"] 591 | }, 592 | { 593 | "voice_id": "XrExE9yKIg1WjnnlVkGX", 594 | "name": "Matilda", 595 | "samples": null, 596 | "category": "premade", 597 | "fine_tuning": { 598 | "language": null, 599 | "is_allowed_to_fine_tune": false, 600 | "fine_tuning_requested": false, 601 | "finetuning_state": "not_started", 602 | "verification_attempts": null, 603 | "verification_failures": [], 604 | "verification_attempts_count": 0, 605 | "slice_ids": null, 606 | "manual_verification": null, 607 | "manual_verification_requested": false 608 | }, 609 | "labels": { 610 | "accent": "american", 611 | "description": "warm", 612 | "age": "young", 613 | "gender": "female", 614 | "use case": "audiobook" 615 | }, 616 | "description": null, 617 | "preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/XrExE9yKIg1WjnnlVkGX/b930e18d-6b4d-466e-bab2-0ae97c6d8535.mp3", 618 | "available_for_tiers": [], 619 | "settings": null, 620 | "sharing": null, 621 | "high_quality_base_model_ids": ["eleven_multilingual_v1"] 622 | }, 623 | { 624 | "voice_id": "Yko7PKHZNXotIFUBG7I9", 625 | "name": "Matthew", 626 | "samples": null, 627 | "category": "premade", 628 | "fine_tuning": { 629 | "language": null, 630 | "is_allowed_to_fine_tune": false, 631 | "fine_tuning_requested": false, 632 | "finetuning_state": "not_started", 633 | "verification_attempts": null, 634 | "verification_failures": [], 635 | "verification_attempts_count": 0, 636 | "slice_ids": null, 637 | "manual_verification": null, 638 | "manual_verification_requested": false 639 | }, 640 | "labels": { 641 | "accent": "british", 642 | "age": "middle aged", 643 | "gender": "male", 644 | "use case": "audiobook", 645 | "description ": "calm" 646 | }, 647 | "description": null, 648 | "preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/Yko7PKHZNXotIFUBG7I9/02c66c93-a237-436f-8a7d-43e8c49bc6a3.mp3", 649 | "available_for_tiers": [], 650 | "settings": null, 651 | "sharing": null, 652 | "high_quality_base_model_ids": ["eleven_multilingual_v1"] 653 | }, 654 | { 655 | "voice_id": "ZQe5CZNOzWyzPSCn5a3c", 656 | "name": "James", 657 | "samples": null, 658 | "category": "premade", 659 | "fine_tuning": { 660 | "language": null, 661 | "is_allowed_to_fine_tune": false, 662 | "fine_tuning_requested": false, 663 | "finetuning_state": "not_started", 664 | "verification_attempts": null, 665 | "verification_failures": [], 666 | "verification_attempts_count": 0, 667 | "slice_ids": null, 668 | "manual_verification": null, 669 | "manual_verification_requested": false 670 | }, 671 | "labels": { 672 | "accent": "australian", 673 | "description": "calm ", 674 | "age": "old", 675 | "gender": "male", 676 | "use case": "news" 677 | }, 678 | "description": null, 679 | "preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/ZQe5CZNOzWyzPSCn5a3c/35734112-7b72-48df-bc2f-64d5ab2f791b.mp3", 680 | "available_for_tiers": [], 681 | "settings": null, 682 | "sharing": null, 683 | "high_quality_base_model_ids": ["eleven_multilingual_v1"] 684 | }, 685 | { 686 | "voice_id": "Zlb1dXrM653N07WRdFW3", 687 | "name": "Joseph", 688 | "samples": null, 689 | "category": "premade", 690 | "fine_tuning": { 691 | "language": null, 692 | "is_allowed_to_fine_tune": false, 693 | "fine_tuning_requested": false, 694 | "finetuning_state": "not_started", 695 | "verification_attempts": null, 696 | "verification_failures": [], 697 | "verification_attempts_count": 0, 698 | "slice_ids": null, 699 | "manual_verification": null, 700 | "manual_verification_requested": false 701 | }, 702 | "labels": { 703 | "accent": "british", 704 | "age": "middle aged", 705 | "gender": "male", 706 | "use case": "news", 707 | "description ": "ground reporter " 708 | }, 709 | "description": null, 710 | "preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/Zlb1dXrM653N07WRdFW3/daa22039-8b09-4c65-b59f-c79c48646a72.mp3", 711 | "available_for_tiers": [], 712 | "settings": null, 713 | "sharing": null, 714 | "high_quality_base_model_ids": ["eleven_multilingual_v1"] 715 | }, 716 | { 717 | "voice_id": "bVMeCyTHy58xNoL34h3p", 718 | "name": "Jeremy", 719 | "samples": null, 720 | "category": "premade", 721 | "fine_tuning": { 722 | "language": null, 723 | "is_allowed_to_fine_tune": false, 724 | "fine_tuning_requested": false, 725 | "finetuning_state": "not_started", 726 | "verification_attempts": null, 727 | "verification_failures": [], 728 | "verification_attempts_count": 0, 729 | "slice_ids": null, 730 | "manual_verification": null, 731 | "manual_verification_requested": false 732 | }, 733 | "labels": { 734 | "accent": "american-irish", 735 | "description": "excited", 736 | "age": "young", 737 | "gender": "male", 738 | "use case": "narration" 739 | }, 740 | "description": null, 741 | "preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/bVMeCyTHy58xNoL34h3p/66c47d58-26fd-4b30-8a06-07952116a72c.mp3", 742 | "available_for_tiers": [], 743 | "settings": null, 744 | "sharing": null, 745 | "high_quality_base_model_ids": ["eleven_multilingual_v1"] 746 | }, 747 | { 748 | "voice_id": "flq6f7yk4E4fJM5XTYuZ", 749 | "name": "Michael", 750 | "samples": null, 751 | "category": "premade", 752 | "fine_tuning": { 753 | "language": null, 754 | "is_allowed_to_fine_tune": false, 755 | "fine_tuning_requested": false, 756 | "finetuning_state": "not_started", 757 | "verification_attempts": null, 758 | "verification_failures": [], 759 | "verification_attempts_count": 0, 760 | "slice_ids": null, 761 | "manual_verification": null, 762 | "manual_verification_requested": false 763 | }, 764 | "labels": { 765 | "accent": "american", 766 | "age": "old", 767 | "gender": "male", 768 | "use case": "audiobook", 769 | "description ": "orotund" 770 | }, 771 | "description": null, 772 | "preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/flq6f7yk4E4fJM5XTYuZ/c6431a82-f7d2-4905-b8a4-a631960633d6.mp3", 773 | "available_for_tiers": [], 774 | "settings": null, 775 | "sharing": null, 776 | "high_quality_base_model_ids": ["eleven_multilingual_v1"] 777 | }, 778 | { 779 | "voice_id": "g5CIjZEefAph4nQFvHAz", 780 | "name": "Ethan", 781 | "samples": null, 782 | "category": "premade", 783 | "fine_tuning": { 784 | "language": null, 785 | "is_allowed_to_fine_tune": false, 786 | "fine_tuning_requested": false, 787 | "finetuning_state": "not_started", 788 | "verification_attempts": null, 789 | "verification_failures": [], 790 | "verification_attempts_count": 0, 791 | "slice_ids": null, 792 | "manual_verification": null, 793 | "manual_verification_requested": false 794 | }, 795 | "labels": { 796 | "accent": "american", 797 | "age": "young", 798 | "gender": "male", 799 | "use case": "ASMR", 800 | "description ": "whisper" 801 | }, 802 | "description": null, 803 | "preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/g5CIjZEefAph4nQFvHAz/26acfa99-fdec-43b8-b2ee-e49e75a3ac16.mp3", 804 | "available_for_tiers": [], 805 | "settings": null, 806 | "sharing": null, 807 | "high_quality_base_model_ids": ["eleven_multilingual_v1"] 808 | }, 809 | { 810 | "voice_id": "jBpfuIE2acCO8z3wKNLl", 811 | "name": "Gigi", 812 | "samples": null, 813 | "category": "premade", 814 | "fine_tuning": { 815 | "language": null, 816 | "is_allowed_to_fine_tune": false, 817 | "fine_tuning_requested": false, 818 | "finetuning_state": "not_started", 819 | "verification_attempts": null, 820 | "verification_failures": [], 821 | "verification_attempts_count": 0, 822 | "slice_ids": null, 823 | "manual_verification": null, 824 | "manual_verification_requested": false 825 | }, 826 | "labels": { 827 | "accent": "american", 828 | "description": "childlish", 829 | "age": "young", 830 | "gender": "female", 831 | "use case": "animation" 832 | }, 833 | "description": null, 834 | "preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/jBpfuIE2acCO8z3wKNLl/3a7e4339-78fa-404e-8d10-c3ef5587935b.mp3", 835 | "available_for_tiers": [], 836 | "settings": null, 837 | "sharing": null, 838 | "high_quality_base_model_ids": ["eleven_multilingual_v1"] 839 | }, 840 | { 841 | "voice_id": "jsCqWAovK2LkecY7zXl4", 842 | "name": "Freya", 843 | "samples": null, 844 | "category": "premade", 845 | "fine_tuning": { 846 | "language": null, 847 | "is_allowed_to_fine_tune": false, 848 | "fine_tuning_requested": false, 849 | "finetuning_state": "not_started", 850 | "verification_attempts": null, 851 | "verification_failures": [], 852 | "verification_attempts_count": 0, 853 | "slice_ids": null, 854 | "manual_verification": null, 855 | "manual_verification_requested": false 856 | }, 857 | "labels": { 858 | "accent": "american", 859 | "age": "young", 860 | "gender": "female", 861 | "description ": "overhyped", 862 | "usecase": "video games" 863 | }, 864 | "description": null, 865 | "preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/jsCqWAovK2LkecY7zXl4/8e1f5240-556e-4fd5-892c-25df9ea3b593.mp3", 866 | "available_for_tiers": [], 867 | "settings": null, 868 | "sharing": null, 869 | "high_quality_base_model_ids": ["eleven_multilingual_v1"] 870 | }, 871 | { 872 | "voice_id": "oWAxZDx7w5VEj9dCyTzz", 873 | "name": "Grace", 874 | "samples": null, 875 | "category": "premade", 876 | "fine_tuning": { 877 | "language": null, 878 | "is_allowed_to_fine_tune": false, 879 | "fine_tuning_requested": false, 880 | "finetuning_state": "not_started", 881 | "verification_attempts": null, 882 | "verification_failures": [], 883 | "verification_attempts_count": 0, 884 | "slice_ids": null, 885 | "manual_verification": null, 886 | "manual_verification_requested": false 887 | }, 888 | "labels": { 889 | "accent": "american-southern", 890 | "age": "young", 891 | "gender": "female", 892 | "use case": "audiobook ", 893 | "description ": "gentle" 894 | }, 895 | "description": null, 896 | "preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/oWAxZDx7w5VEj9dCyTzz/84a36d1c-e182-41a8-8c55-dbdd15cd6e72.mp3", 897 | "available_for_tiers": [], 898 | "settings": null, 899 | "sharing": null, 900 | "high_quality_base_model_ids": ["eleven_multilingual_v1"] 901 | }, 902 | { 903 | "voice_id": "onwK4e9ZLuTAKqWW03F9", 904 | "name": "Daniel", 905 | "samples": null, 906 | "category": "premade", 907 | "fine_tuning": { 908 | "language": null, 909 | "is_allowed_to_fine_tune": false, 910 | "fine_tuning_requested": false, 911 | "finetuning_state": "not_started", 912 | "verification_attempts": null, 913 | "verification_failures": [], 914 | "verification_attempts_count": 0, 915 | "slice_ids": null, 916 | "manual_verification": null, 917 | "manual_verification_requested": false 918 | }, 919 | "labels": { 920 | "accent": "british", 921 | "description": "deep", 922 | "age": "middle aged", 923 | "gender": "male", 924 | "use case": "news presenter" 925 | }, 926 | "description": null, 927 | "preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/onwK4e9ZLuTAKqWW03F9/7eee0236-1a72-4b86-b303-5dcadc007ba9.mp3", 928 | "available_for_tiers": [], 929 | "settings": null, 930 | "sharing": null, 931 | "high_quality_base_model_ids": ["eleven_multilingual_v1"] 932 | }, 933 | { 934 | "voice_id": "pMsXgVXv3BLzUgSXRplE", 935 | "name": "Serena", 936 | "samples": null, 937 | "category": "premade", 938 | "fine_tuning": { 939 | "language": null, 940 | "is_allowed_to_fine_tune": false, 941 | "fine_tuning_requested": false, 942 | "finetuning_state": "not_started", 943 | "verification_attempts": null, 944 | "verification_failures": [], 945 | "verification_attempts_count": 0, 946 | "slice_ids": null, 947 | "manual_verification": null, 948 | "manual_verification_requested": false 949 | }, 950 | "labels": { 951 | "accent": "american", 952 | "description": "pleasant", 953 | "age": "middle aged", 954 | "gender": "female", 955 | "use case": "interactive" 956 | }, 957 | "description": null, 958 | "preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/pMsXgVXv3BLzUgSXRplE/d61f18ed-e5b0-4d0b-a33c-5c6e7e33b053.mp3", 959 | "available_for_tiers": [], 960 | "settings": null, 961 | "sharing": null, 962 | "high_quality_base_model_ids": ["eleven_multilingual_v1"] 963 | }, 964 | { 965 | "voice_id": "pNInz6obpgDQGcFmaJgB", 966 | "name": "Adam", 967 | "samples": null, 968 | "category": "premade", 969 | "fine_tuning": { 970 | "language": null, 971 | "is_allowed_to_fine_tune": false, 972 | "fine_tuning_requested": false, 973 | "finetuning_state": "not_started", 974 | "verification_attempts": null, 975 | "verification_failures": [], 976 | "verification_attempts_count": 0, 977 | "slice_ids": null, 978 | "manual_verification": null, 979 | "manual_verification_requested": false 980 | }, 981 | "labels": { 982 | "accent": "american", 983 | "description": "deep", 984 | "age": "middle aged", 985 | "gender": "male", 986 | "use case": "narration" 987 | }, 988 | "description": null, 989 | "preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/pNInz6obpgDQGcFmaJgB/38a69695-2ca9-4b9e-b9ec-f07ced494a58.mp3", 990 | "available_for_tiers": [], 991 | "settings": null, 992 | "sharing": null, 993 | "high_quality_base_model_ids": [] 994 | }, 995 | { 996 | "voice_id": "piTKgcLEGmPE4e6mEKli", 997 | "name": "Nicole", 998 | "samples": null, 999 | "category": "premade", 1000 | "fine_tuning": { 1001 | "language": null, 1002 | "is_allowed_to_fine_tune": false, 1003 | "fine_tuning_requested": false, 1004 | "finetuning_state": "not_started", 1005 | "verification_attempts": null, 1006 | "verification_failures": [], 1007 | "verification_attempts_count": 0, 1008 | "slice_ids": null, 1009 | "manual_verification": null, 1010 | "manual_verification_requested": false 1011 | }, 1012 | "labels": { 1013 | "accent": "american", 1014 | "description": "whisper", 1015 | "age": "young", 1016 | "gender": "female", 1017 | "use case": "audiobook" 1018 | }, 1019 | "description": null, 1020 | "preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/piTKgcLEGmPE4e6mEKli/c269a54a-e2bc-44d0-bb46-4ed2666d6340.mp3", 1021 | "available_for_tiers": [], 1022 | "settings": null, 1023 | "sharing": null, 1024 | "high_quality_base_model_ids": ["eleven_multilingual_v1"] 1025 | }, 1026 | { 1027 | "voice_id": "t0jbNlBVZ17f02VDIeMI", 1028 | "name": "Jessie", 1029 | "samples": null, 1030 | "category": "premade", 1031 | "fine_tuning": { 1032 | "language": null, 1033 | "is_allowed_to_fine_tune": false, 1034 | "fine_tuning_requested": false, 1035 | "finetuning_state": "not_started", 1036 | "verification_attempts": null, 1037 | "verification_failures": [], 1038 | "verification_attempts_count": 0, 1039 | "slice_ids": null, 1040 | "manual_verification": null, 1041 | "manual_verification_requested": false 1042 | }, 1043 | "labels": { 1044 | "accent": "american", 1045 | "description": "raspy ", 1046 | "age": "old", 1047 | "gender": "male", 1048 | "use case": "video games" 1049 | }, 1050 | "description": null, 1051 | "preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/t0jbNlBVZ17f02VDIeMI/e26939e3-61a4-4872-a41d-33922cfbdcdc.mp3", 1052 | "available_for_tiers": [], 1053 | "settings": null, 1054 | "sharing": null, 1055 | "high_quality_base_model_ids": ["eleven_multilingual_v1"] 1056 | }, 1057 | { 1058 | "voice_id": "wViXBPUzp2ZZixB1xQuM", 1059 | "name": "Ryan", 1060 | "samples": null, 1061 | "category": "premade", 1062 | "fine_tuning": { 1063 | "language": null, 1064 | "is_allowed_to_fine_tune": false, 1065 | "fine_tuning_requested": false, 1066 | "finetuning_state": "not_started", 1067 | "verification_attempts": null, 1068 | "verification_failures": [], 1069 | "verification_attempts_count": 0, 1070 | "slice_ids": null, 1071 | "manual_verification": null, 1072 | "manual_verification_requested": false 1073 | }, 1074 | "labels": { 1075 | "age": "middle aged", 1076 | "description": "soldier", 1077 | "accent": "american", 1078 | "gender": "male", 1079 | "use case": "audiobook" 1080 | }, 1081 | "description": null, 1082 | "preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/wViXBPUzp2ZZixB1xQuM/4a82f749-889c-4097-85f0-a3826a28b1d8.mp3", 1083 | "available_for_tiers": [], 1084 | "settings": null, 1085 | "sharing": null, 1086 | "high_quality_base_model_ids": ["eleven_multilingual_v1"] 1087 | }, 1088 | { 1089 | "voice_id": "yoZ06aMxZJJ28mfd3POQ", 1090 | "name": "Sam", 1091 | "samples": null, 1092 | "category": "premade", 1093 | "fine_tuning": { 1094 | "language": null, 1095 | "is_allowed_to_fine_tune": false, 1096 | "fine_tuning_requested": false, 1097 | "finetuning_state": "not_started", 1098 | "verification_attempts": null, 1099 | "verification_failures": [], 1100 | "verification_attempts_count": 0, 1101 | "slice_ids": null, 1102 | "manual_verification": null, 1103 | "manual_verification_requested": false 1104 | }, 1105 | "labels": { 1106 | "accent": "american", 1107 | "description": "raspy", 1108 | "age": "young", 1109 | "gender": "male", 1110 | "use case": "narration" 1111 | }, 1112 | "description": null, 1113 | "preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/yoZ06aMxZJJ28mfd3POQ/ac9d1c91-92ce-4b20-8cc2-3187a7da49ec.mp3", 1114 | "available_for_tiers": [], 1115 | "settings": null, 1116 | "sharing": null, 1117 | "high_quality_base_model_ids": [] 1118 | }, 1119 | { 1120 | "voice_id": "z9fAnlkpzviPz146aGWa", 1121 | "name": "Glinda", 1122 | "samples": null, 1123 | "category": "premade", 1124 | "fine_tuning": { 1125 | "language": null, 1126 | "is_allowed_to_fine_tune": false, 1127 | "fine_tuning_requested": false, 1128 | "finetuning_state": "not_started", 1129 | "verification_attempts": null, 1130 | "verification_failures": [], 1131 | "verification_attempts_count": 0, 1132 | "slice_ids": null, 1133 | "manual_verification": null, 1134 | "manual_verification_requested": false 1135 | }, 1136 | "labels": { 1137 | "accent": "american", 1138 | "description": "witch", 1139 | "age": "middle aged", 1140 | "gender": "female", 1141 | "use case": "video games" 1142 | }, 1143 | "description": null, 1144 | "preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/z9fAnlkpzviPz146aGWa/cbc60443-7b61-4ebb-b8e1-5c03237ea01d.mp3", 1145 | "available_for_tiers": [], 1146 | "settings": null, 1147 | "sharing": null, 1148 | "high_quality_base_model_ids": ["eleven_multilingual_v1"] 1149 | }, 1150 | { 1151 | "voice_id": "zcAOhNBS3c14rBihAFp1", 1152 | "name": "Giovanni", 1153 | "samples": null, 1154 | "category": "premade", 1155 | "fine_tuning": { 1156 | "language": null, 1157 | "is_allowed_to_fine_tune": false, 1158 | "fine_tuning_requested": false, 1159 | "finetuning_state": "not_started", 1160 | "verification_attempts": null, 1161 | "verification_failures": [], 1162 | "verification_attempts_count": 0, 1163 | "slice_ids": null, 1164 | "manual_verification": null, 1165 | "manual_verification_requested": false 1166 | }, 1167 | "labels": { 1168 | "accent": "english-italian", 1169 | "description": "foreigner", 1170 | "age": "young", 1171 | "gender": "male", 1172 | "use case": "audiobook" 1173 | }, 1174 | "description": null, 1175 | "preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/zcAOhNBS3c14rBihAFp1/e7410f8f-4913-4cb8-8907-784abee5aff8.mp3", 1176 | "available_for_tiers": [], 1177 | "settings": null, 1178 | "sharing": null, 1179 | "high_quality_base_model_ids": ["eleven_multilingual_v1"] 1180 | }, 1181 | { 1182 | "voice_id": "zrHiDhphv9ZnVXBqCLjz", 1183 | "name": "Mimi", 1184 | "samples": null, 1185 | "category": "premade", 1186 | "fine_tuning": { 1187 | "language": null, 1188 | "is_allowed_to_fine_tune": false, 1189 | "fine_tuning_requested": false, 1190 | "finetuning_state": "not_started", 1191 | "verification_attempts": null, 1192 | "verification_failures": [], 1193 | "verification_attempts_count": 0, 1194 | "slice_ids": null, 1195 | "manual_verification": null, 1196 | "manual_verification_requested": false 1197 | }, 1198 | "labels": { 1199 | "accent": "english-swedish", 1200 | "description": "childish", 1201 | "age": "young", 1202 | "gender": "female", 1203 | "use case": "animation" 1204 | }, 1205 | "description": null, 1206 | "preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/zrHiDhphv9ZnVXBqCLjz/decbf20b-0f57-4fac-985b-a4f0290ebfc4.mp3", 1207 | "available_for_tiers": [], 1208 | "settings": null, 1209 | "sharing": null, 1210 | "high_quality_base_model_ids": ["eleven_multilingual_v1"] 1211 | } 1212 | ] 1213 | } 1214 | -------------------------------------------------------------------------------- /data/voices.lmnt.json: -------------------------------------------------------------------------------- 1 | { 2 | "voices": { 3 | "3058b1b3-6a9e-4e59-8332-3899f846a2d6": { 4 | "id": "3058b1b3-6a9e-4e59-8332-3899f846a2d6", 5 | "name": "Beatriz", 6 | "gender": "F", 7 | "tags": ["portuguese-accent"], 8 | "imageUrl": "https://api.lmnt.com/img/voice/beatriz.webp", 9 | "state": "ready", 10 | "description": "Beatriz's voice possesses a distinctive Portuguese accent, infusing her speech with the warm, rhythmic cadence of her native language. As a middle-aged woman, her voice carries a depth and maturity that adds an air of wisdom and experience to her conversations." 11 | }, 12 | "curtis": { 13 | "id": "curtis", 14 | "name": "Curtis", 15 | "gender": "M", 16 | "tags": ["middle-aged"], 17 | "imageUrl": "https://api.lmnt.com/img/voice/curtis.webp", 18 | "state": "ready", 19 | "description": "Curtis' voice carries the seasoned timbre of middle age, filled with warmth, curiosity, and a hint of wisdom gained over the years. His enthusiastic tone reflects his keen interest in the world, often rising and falling with the ebb and flow of exciting new discoveries." 20 | }, 21 | "dalton": { 22 | "id": "dalton", 23 | "name": "Dalton", 24 | "gender": "M", 25 | "tags": ["southern-drawl"], 26 | "imageUrl": "https://api.lmnt.com/img/voice/dalton.webp", 27 | "state": "ready", 28 | "description": "Dalton's voice is marked by a distinctive southern-drawl, stretching out his vowels in a slow, languid manner that evokes images of lazy afternoons in the Deep South. His speech is charming and soothing, filled with a warmth that is characteristic of his southern roots." 29 | }, 30 | "7a9c1584-66a7-4667-b055-dc551d9556c2": { 31 | "id": "7a9c1584-66a7-4667-b055-dc551d9556c2", 32 | "name": "Donna", 33 | "gender": "F", 34 | "tags": ["middle-aged"], 35 | "imageUrl": "https://api.lmnt.com/img/voice/donna.webp", 36 | "state": "ready", 37 | "description": "Donna's middle-aged, deliberate voice carries an authoritative warmth that captures attention but never overpowers. Her carefully-chosen words are spoken with a slow, controlled cadence, each syllable rich with experience and the wisdom of the years." 38 | }, 39 | "4e584062-ad8d-4cde-a9df-8f9743046a3d": { 40 | "id": "4e584062-ad8d-4cde-a9df-8f9743046a3d", 41 | "name": "Eleanor", 42 | "gender": "F", 43 | "tags": ["old-timey"], 44 | "imageUrl": "https://api.lmnt.com/img/voice/eleanor.webp", 45 | "state": "ready", 46 | "description": "Eleanor's voice is rich with the bygone days, brimming with a gentle and fetching old-world charm. Her occasional use of antiquated lingo, coupled with a soft, melodious cadence, is reminiscent of a vintage radio broadcast, inspiring nostalgia for a simpler time." 47 | }, 48 | "471c04bc-0ee7-44c4-8d7d-15ee0db9d429": { 49 | "id": "471c04bc-0ee7-44c4-8d7d-15ee0db9d429", 50 | "name": "Ethel", 51 | "gender": "F", 52 | "tags": ["older"], 53 | "imageUrl": "https://api.lmnt.com/img/voice/ethel.webp", 54 | "state": "ready", 55 | "description": "Ethel's voice carries the seasoned rasp of many years, flavored with a touch of grumpiness that often points to youngsters overstepping their boundaries. Her voice, like a cane held aloft in reprimand, has a predictable cantankerousness, especially when declaring the sanctity of her meticulously manicured lawn." 56 | }, 57 | "abb4aea5-bc72-467d-82bb-c3169a528cde": { 58 | "id": "abb4aea5-bc72-467d-82bb-c3169a528cde", 59 | "name": "Giuseppe", 60 | "gender": "M", 61 | "tags": ["italian-accent"], 62 | "imageUrl": "https://api.lmnt.com/img/voice/giuseppe.webp", 63 | "state": "ready", 64 | "description": "Giuseppe's voice is soaked with a distinct and enticing Italian-accent, dropping vowels and elongating syllables in a melodious, sing-song manner. His voice carries the warmth of his homeland, weaving together a vibrant tapestry of enthusiastic pitches and expressive intonations." 65 | }, 66 | "034b632b-df71-46c8-b440-86a42ffc3cf3": { 67 | "id": "034b632b-df71-46c8-b440-86a42ffc3cf3", 68 | "name": "Henry", 69 | "gender": "M", 70 | "tags": ["old-timey-radio"], 71 | "imageUrl": "https://api.lmnt.com/img/voice/henry.webp", 72 | "state": "ready", 73 | "description": "Henry's voice harkens back to the golden age of radio, with a grainy warmth reminiscent of stations of decades past. His rich, baritone tone is punctuated by the charmingly antiquated inflections and expressions so familiar to the old-timey radio era." 74 | }, 75 | "c8ea4f2a-06e6-4d7b-9484-db941bf7c657": { 76 | "id": "c8ea4f2a-06e6-4d7b-9484-db941bf7c657", 77 | "name": "Joe", 78 | "gender": "M", 79 | "tags": ["middle-aged"], 80 | "imageUrl": "https://api.lmnt.com/img/voice/joe.webp", 81 | "state": "ready", 82 | "description": "Joe's voice, warm and full-bodied with age, resonates with an inviting timbre that instills a sense of comfort and trust. It carries the kind of wisdom you would expect from a dedicated teacher or a dependable lifelong friend, always steadied with a touch of gentle authority." 83 | }, 84 | "9db02220-4029-40f1-a807-55d645386d2b": { 85 | "id": "9db02220-4029-40f1-a807-55d645386d2b", 86 | "name": "Kathrine", 87 | "gender": "F", 88 | "tags": ["middle-aged"], 89 | "imageUrl": "https://api.lmnt.com/img/voice/kathrine.webp", 90 | "state": "ready", 91 | "description": "Kathrine's voice carries an endearing mix of age-acquired wisdom and youthful vigor, enriched by the warm undertones of a middle-aged woman's timbre. Her intellectually-driven queries and insights reveal a certain sparkle of curiosity, resonating in her carefully modulated and intelligent tone." 92 | }, 93 | "mrnmrz72": { 94 | "id": "mrnmrz72", 95 | "name": "Marzia", 96 | "gender": "F", 97 | "imageUrl": "https://api.lmnt.com/img/voice/marzia.webp", 98 | "state": "ready", 99 | "tags": ["italian-accent"], 100 | "description": "Marzia's voice is delicately accented with the soft, melodic lilt of her Italian origin, imbuing her speech with an enchanting rhythm. Her tones come out warm and expressive, evoking her Mediterranean heritage and lending her words an added touch of charm and vivacity." 101 | }, 102 | "maurice": { 103 | "id": "maurice", 104 | "name": "Maurice", 105 | "gender": "M", 106 | "tags": ["older"], 107 | "imageUrl": "https://api.lmnt.com/img/voice/maurice.webp", 108 | "state": "ready", 109 | "description": "Maurice's voice is audibly aging, filled with richness that only comes with years of experience, reminiscent of a grandpa who has many tales to tell. There's a comforting timbre to it, warm and slightly gravelly, making you feel like you're wrapped in a loving embrace every time he speaks." 110 | }, 111 | "e8d31f4e-e8af-4c72-9fc9-f4f6f535c6cc": { 112 | "id": "e8d31f4e-e8af-4c72-9fc9-f4f6f535c6cc", 113 | "name": "Natalie", 114 | "gender": "F", 115 | "tags": ["middle-aged"], 116 | "imageUrl": "https://api.lmnt.com/img/voice/natalie.webp", 117 | "state": "ready", 118 | "description": "Natalie's voice, exhibiting the maturity of her middle age, lacks variation in pitch and rhythm; it sounds as though she's simply reciting words from a page. Despite her steady, somewhat monotonous tone, a subtle depth of knowledge and experience threads through every word she articulates." 119 | }, 120 | "4e95c4a7-95aa-4b1d-bc23-00f7d1d484ea": { 121 | "id": "4e95c4a7-95aa-4b1d-bc23-00f7d1d484ea", 122 | "name": "Oliver", 123 | "gender": "M", 124 | "tags": ["middle-aged"], 125 | "imageUrl": "https://api.lmnt.com/img/voice/oliver.webp", 126 | "state": "ready", 127 | "description": "Oliver's voice holds an engaging lilt, a middle-aged tone doused with the rhythmic timbre you'd expect from a seasoned carnival worker. His words rise and fall like a festive carousel ride, instantly drawing you into his lively, slightly eccentric world." 128 | }, 129 | "olivia": { 130 | "id": "olivia", 131 | "name": "Olivia", 132 | "gender": "F", 133 | "tags": ["british-accent"], 134 | "imageUrl": "https://api.lmnt.com/img/voice/olivia.webp", 135 | "state": "ready", 136 | "description": "Olivia's voice carries a distinct and captivating British accent, elegantly delivering crisp, clear English with a mix of sophistication and charm. Her tone is smooth and soothing, yet authoritative, displaying an enchanting blend of warmth and confidence." 137 | }, 138 | "598edd09-4fb2-460c-9ad1-d9f8081079d3": { 139 | "id": "598edd09-4fb2-460c-9ad1-d9f8081079d3", 140 | "name": "Priya", 141 | "gender": "F", 142 | "tags": ["indian-accent"], 143 | "imageUrl": "https://api.lmnt.com/img/voice/priya.webp", 144 | "state": "ready", 145 | "description": "Priya's voice carries the rich, melodious tones of her Indian heritage, often marked by a distinct, rhythmic accent that adds a profound depth to her communication. Her speech is warm and expressive, radiating the vibrant culture of her Indian roots." 146 | }, 147 | "rbalogh": { 148 | "id": "rbalogh", 149 | "name": "Reba", 150 | "gender": "F", 151 | "imageUrl": "https://api.lmnt.com/img/voice/rbalogh.webp", 152 | "state": "ready", 153 | "tags": ["hungarian-accent"], 154 | "description": "Reba's voice possesses a lush, melodious quality, laced with a distinctive Hungarian accent that adds to her unique charm. The nuances of her speech carry inflections and rhythms that reflect her Eastern European heritage, making her voice instantly recognizable and engaging." 155 | }, 156 | "shanti": { 157 | "id": "shanti", 158 | "name": "Shanti", 159 | "gender": "F", 160 | "tags": ["indian-accent", "older"], 161 | "imageUrl": "https://api.lmnt.com/img/voice/shanti.webp", 162 | "state": "ready", 163 | "description": "Shanti's voice carries the melodic lilt of an Indian accent, seasoned with age and tinged slightly nasal. Her speech, durable with time, draws a picture of wisdom and character." 164 | }, 165 | "a904f97c-ff5f-4099-98c8-b5effe00c9a6": { 166 | "id": "a904f97c-ff5f-4099-98c8-b5effe00c9a6", 167 | "name": "Szymon", 168 | "gender": "M", 169 | "tags": ["polish-accent"], 170 | "imageUrl": "https://api.lmnt.com/img/voice/szymon.webp", 171 | "state": "ready", 172 | "description": "Szymon's voice is layered with a distinctly Polish accent, making his consonants sharp and vowels warm, embellishing his speech in a captivating, melodic rhythm. The soft tones reflect the Slavic origin, adding a unique charm and depth to his narration and conversations." 173 | } 174 | } 175 | } 176 | -------------------------------------------------------------------------------- /example/example-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lgrammel/storyteller/0865ef487b8100848fcab25af80e60bffb4f42da/example/example-1.png -------------------------------------------------------------------------------- /example/example-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lgrammel/storyteller/0865ef487b8100848fcab25af80e60bffb4f42da/example/example-2.png -------------------------------------------------------------------------------- /example/example-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lgrammel/storyteller/0865ef487b8100848fcab25af80e60bffb4f42da/example/example-3.png -------------------------------------------------------------------------------- /next.config.js: -------------------------------------------------------------------------------- 1 | /** @type {import('next').NextConfig} */ 2 | const webpack = require("webpack"); 3 | 4 | module.exports = { 5 | output: "export", 6 | 7 | webpack: (config, { isServer }) => { 8 | if (isServer) { 9 | return config; 10 | } 11 | 12 | config.resolve = config.resolve ?? {}; 13 | config.resolve.fallback = config.resolve.fallback ?? {}; 14 | 15 | // async hooks is not available in the browser: 16 | config.resolve.fallback.async_hooks = false; 17 | 18 | return config; 19 | }, 20 | }; 21 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "storyteller", 3 | "description": "StoryTeller: generate short audio stories for pre-school kids.", 4 | "author": "Lars Grammel", 5 | "license": "MIT", 6 | "keywords": [ 7 | "modelfusion", 8 | "multimodal", 9 | "openai", 10 | "whisper", 11 | "ai" 12 | ], 13 | "version": "0.1.0", 14 | "private": true, 15 | "scripts": { 16 | "next": "next dev", 17 | "fastify": "npx tsx src/storyteller/server.ts", 18 | "build": "next build", 19 | "lint": "next lint" 20 | }, 21 | "engines": { 22 | "node": ">=18" 23 | }, 24 | "dependencies": { 25 | "@fastify/cors": "^8.4.0", 26 | "@fastify/static": "^6.11.2", 27 | "@radix-ui/react-aspect-ratio": "^1.0.3", 28 | "@radix-ui/react-slot": "^1.0.2", 29 | "@types/node": "20.6.0", 30 | "@types/react": "18.2.21", 31 | "@types/react-dom": "18.2.7", 32 | "autoprefixer": "10.4.15", 33 | "bufferutil": "^4.0.8", 34 | "class-variance-authority": "^0.7.0", 35 | "clsx": "^2.0.0", 36 | "eslint": "8.49.0", 37 | "eslint-config-next": "13.4.19", 38 | "eventsource-parser": "1.1.1", 39 | "fastify": "^4.24.3", 40 | "lucide-react": "^0.276.0", 41 | "modelfusion": "0.131.0", 42 | "modelfusion-experimental": "0.6.0", 43 | "next": "13.5.6", 44 | "postcss": "8.4.31", 45 | "react": "18.2.0", 46 | "react-dom": "18.2.0", 47 | "tailwind-merge": "^1.14.0", 48 | "tailwindcss": "3.3.3", 49 | "tailwindcss-animate": "^1.0.7", 50 | "typescript": "5.2.2", 51 | "utf-8-validate": "^6.0.3", 52 | "zod": "3.22.4", 53 | "zod-to-json-schema": "3.21.4" 54 | }, 55 | "devDependencies": { 56 | "dotenv": "16.0.3", 57 | "tsx": "^3.12.8" 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /pages/_app.tsx: -------------------------------------------------------------------------------- 1 | import "./globals.css"; 2 | import type { AppProps } from "next/app"; 3 | 4 | export default function ExampleApp({ 5 | Component, 6 | pageProps, 7 | }: AppProps): JSX.Element { 8 | return ; 9 | } 10 | -------------------------------------------------------------------------------- /pages/_document.tsx: -------------------------------------------------------------------------------- 1 | import { Html, Head, Main, NextScript } from "next/document"; 2 | 3 | export default function Document() { 4 | return ( 5 | 6 | 7 | 8 |
9 | 10 | 11 | 12 | ); 13 | } 14 | -------------------------------------------------------------------------------- /pages/globals.css: -------------------------------------------------------------------------------- 1 | @tailwind base; 2 | @tailwind components; 3 | @tailwind utilities; 4 | 5 | @layer base { 6 | :root { 7 | --background: 0 0% 100%; 8 | --foreground: 222.2 84% 4.9%; 9 | 10 | --card: 0 0% 100%; 11 | --card-foreground: 222.2 84% 4.9%; 12 | 13 | --popover: 0 0% 100%; 14 | --popover-foreground: 222.2 84% 4.9%; 15 | 16 | --primary: 222.2 47.4% 11.2%; 17 | --primary-foreground: 210 40% 98%; 18 | 19 | --secondary: 210 40% 96.1%; 20 | --secondary-foreground: 222.2 47.4% 11.2%; 21 | 22 | --muted: 210 40% 96.1%; 23 | --muted-foreground: 215.4 16.3% 46.9%; 24 | 25 | --accent: 210 40% 96.1%; 26 | --accent-foreground: 222.2 47.4% 11.2%; 27 | 28 | --destructive: 0 84.2% 60.2%; 29 | --destructive-foreground: 210 40% 98%; 30 | 31 | --border: 214.3 31.8% 91.4%; 32 | --input: 214.3 31.8% 91.4%; 33 | --ring: 222.2 84% 4.9%; 34 | 35 | --radius: 0.5rem; 36 | } 37 | 38 | .dark { 39 | --background: 222.2 84% 4.9%; 40 | --foreground: 210 40% 98%; 41 | 42 | --card: 222.2 84% 4.9%; 43 | --card-foreground: 210 40% 98%; 44 | 45 | --popover: 222.2 84% 4.9%; 46 | --popover-foreground: 210 40% 98%; 47 | 48 | --primary: 210 40% 98%; 49 | --primary-foreground: 222.2 47.4% 11.2%; 50 | 51 | --secondary: 217.2 32.6% 17.5%; 52 | --secondary-foreground: 210 40% 98%; 53 | 54 | --muted: 217.2 32.6% 17.5%; 55 | --muted-foreground: 215 20.2% 65.1%; 56 | 57 | --accent: 217.2 32.6% 17.5%; 58 | --accent-foreground: 210 40% 98%; 59 | 60 | --destructive: 0 62.8% 30.6%; 61 | --destructive-foreground: 210 40% 98%; 62 | 63 | --border: 217.2 32.6% 17.5%; 64 | --input: 217.2 32.6% 17.5%; 65 | --ring: 212.7 26.8% 83.9%; 66 | } 67 | } 68 | 69 | @layer base { 70 | * { 71 | @apply border-border; 72 | } 73 | body { 74 | @apply bg-background text-foreground; 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /pages/index.tsx: -------------------------------------------------------------------------------- 1 | import { AspectRatio } from "@/components/ui/aspect-ratio"; 2 | import { Button } from "@/components/ui/button"; 3 | import { 4 | Card, 5 | CardContent, 6 | CardDescription, 7 | CardFooter, 8 | CardHeader, 9 | CardTitle, 10 | } from "@/components/ui/card"; 11 | import { Skeleton } from "@/components/ui/skeleton"; 12 | import { storytellerSchema } from "@/storyteller/storytellerSchema"; 13 | import { Loader2, Mic } from "lucide-react"; 14 | import { delay } from "modelfusion"; 15 | import { 16 | convertAudioChunksToBase64, 17 | invokeFlow, 18 | } from "modelfusion-experimental/browser"; 19 | import { useRef, useState } from "react"; 20 | 21 | const baseUrl = process.env.NEXT_PUBLIC_BASE_URL; 22 | 23 | export default function Home() { 24 | const mediaRecorderRef = useRef(null); 25 | const audioChunksRef = useRef([]); 26 | const [isRecording, setIsRecording] = useState(false); 27 | const [waitingForUserInput, setWaitingForUserInput] = useState(true); 28 | const [imageUrl, setImageUrl] = useState(null); 29 | const [title, setTitle] = useState(null); 30 | const [input, setInput] = useState(null); 31 | const [audioUrls, setAudioUrls] = useState([]); 32 | const [activePart, setActivePart] = useState(0); 33 | const [generatingStory, setGeneratingStory] = useState(false); 34 | const [shouldAutoPlay, setShouldAutoPlay] = useState(false); 35 | const [error, setError] = useState(null); 36 | 37 | const resetError = () => { 38 | setError(null); 39 | }; 40 | 41 | const startRecording = () => { 42 | if (isRecording) return; 43 | 44 | resetError(); // Clear any previous errors 45 | 46 | navigator.mediaDevices 47 | .getUserMedia({ audio: true }) 48 | .then((stream) => { 49 | const mediaRecorder = new MediaRecorder(stream); 50 | mediaRecorderRef.current = mediaRecorder; 51 | 52 | mediaRecorder.ondataavailable = (e) => { 53 | audioChunksRef.current.push(e.data); 54 | }; 55 | 56 | // .start(1000): workaround for Safari/iphone 57 | // see https://community.openai.com/t/whisper-api-completely-wrong-for-mp4/289256/12 58 | mediaRecorder.start(1000); 59 | 60 | setIsRecording(true); 61 | }) 62 | .catch((error) => { 63 | setError( 64 | "Error accessing microphone. Please ensure you have given the necessary permissions." 65 | ); 66 | }); 67 | }; 68 | 69 | const stopRecording = () => { 70 | const mediaRecorder = mediaRecorderRef.current; 71 | 72 | if (mediaRecorder && isRecording) { 73 | mediaRecorder.onstop = async () => { 74 | setWaitingForUserInput(false); 75 | setGeneratingStory(true); 76 | setShouldAutoPlay(true); 77 | 78 | try { 79 | const mimeType = mediaRecorder.mimeType; 80 | const audioChunks = audioChunksRef.current; 81 | 82 | audioChunksRef.current = []; 83 | mediaRecorder.stream?.getTracks().forEach((track) => track.stop()); // stop microphone access 84 | 85 | invokeFlow({ 86 | url: `${baseUrl}/generate-story`, 87 | schema: storytellerSchema, 88 | input: { 89 | audioData: await convertAudioChunksToBase64({ 90 | audioChunks, 91 | mimeType, 92 | }), 93 | mimeType, 94 | }, 95 | onEvent(event) { 96 | switch (event.type) { 97 | case "transcribed-input": { 98 | setInput(event.input); 99 | break; 100 | } 101 | case "generated-image": { 102 | setImageUrl(event.url); 103 | break; 104 | } 105 | case "generated-title": { 106 | setTitle(event.title); 107 | break; 108 | } 109 | case "generated-audio-part": { 110 | audioUrls[event.index] = event.url; 111 | setAudioUrls(audioUrls.slice()); 112 | break; 113 | } 114 | } 115 | }, 116 | onStop() { 117 | setGeneratingStory(false); 118 | }, 119 | }); 120 | } catch (error) { 121 | console.error("Error generating story:", error); 122 | setError("An error occurred while generating the story:" + error); 123 | } 124 | }; 125 | 126 | mediaRecorder.stop(); 127 | setIsRecording(false); 128 | } 129 | }; 130 | 131 | const onPlaybackEnded = async () => { 132 | if (activePart === audioUrls.length - 1) { 133 | setActivePart(0); 134 | setShouldAutoPlay(false); 135 | } else { 136 | await delay(1000); // delay between parts to improve the quality of the story 137 | setActivePart(activePart + 1); 138 | } 139 | }; 140 | 141 | return ( 142 |
143 | {error && ( 144 |
148 | Error 149 | {error} 150 | 154 | 160 | Close 161 | 162 | 163 | 164 |
165 | )} 166 | 167 | {waitingForUserInput ? ( 168 | 169 | 170 | Story Teller 171 | 172 | Automatically generate stories for pre-school kids. 173 | 174 | 175 | 176 | 193 | 194 | 195 | ) : ( 196 | 197 | 198 | {input ? ( 199 | "{input}" 200 | ) : ( 201 | 202 | )} 203 | 204 | {title ?? } 205 | 206 | 207 | 208 | {imageUrl != null ? ( 209 |
210 | 211 | {title 216 | 217 |
218 | ) : ( 219 | 220 | )} 221 |
222 | 223 | {audioUrls[activePart] != null ? ( 224 | <> 225 | 241 |
242 | )} 243 |
244 | ); 245 | } 246 | -------------------------------------------------------------------------------- /postcss.config.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | plugins: { 3 | tailwindcss: {}, 4 | autoprefixer: {}, 5 | }, 6 | } 7 | -------------------------------------------------------------------------------- /src/components/ui/aspect-ratio.tsx: -------------------------------------------------------------------------------- 1 | "use client"; 2 | 3 | import * as AspectRatioPrimitive from "@radix-ui/react-aspect-ratio"; 4 | 5 | const AspectRatio = AspectRatioPrimitive.Root; 6 | 7 | export { AspectRatio }; 8 | -------------------------------------------------------------------------------- /src/components/ui/button.tsx: -------------------------------------------------------------------------------- 1 | import * as React from "react" 2 | import { Slot } from "@radix-ui/react-slot" 3 | import { cva, type VariantProps } from "class-variance-authority" 4 | 5 | import { cn } from "@/lib/utils" 6 | 7 | const buttonVariants = cva( 8 | "inline-flex items-center justify-center rounded-md text-sm font-medium ring-offset-background transition-colors focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:pointer-events-none disabled:opacity-50", 9 | { 10 | variants: { 11 | variant: { 12 | default: "bg-primary text-primary-foreground hover:bg-primary/90", 13 | destructive: 14 | "bg-destructive text-destructive-foreground hover:bg-destructive/90", 15 | outline: 16 | "border border-input bg-background hover:bg-accent hover:text-accent-foreground", 17 | secondary: 18 | "bg-secondary text-secondary-foreground hover:bg-secondary/80", 19 | ghost: "hover:bg-accent hover:text-accent-foreground", 20 | link: "text-primary underline-offset-4 hover:underline", 21 | }, 22 | size: { 23 | default: "h-10 px-4 py-2", 24 | sm: "h-9 rounded-md px-3", 25 | lg: "h-11 rounded-md px-8", 26 | icon: "h-10 w-10", 27 | }, 28 | }, 29 | defaultVariants: { 30 | variant: "default", 31 | size: "default", 32 | }, 33 | } 34 | ) 35 | 36 | export interface ButtonProps 37 | extends React.ButtonHTMLAttributes, 38 | VariantProps { 39 | asChild?: boolean 40 | } 41 | 42 | const Button = React.forwardRef( 43 | ({ className, variant, size, asChild = false, ...props }, ref) => { 44 | const Comp = asChild ? Slot : "button" 45 | return ( 46 | 51 | ) 52 | } 53 | ) 54 | Button.displayName = "Button" 55 | 56 | export { Button, buttonVariants } 57 | -------------------------------------------------------------------------------- /src/components/ui/card.tsx: -------------------------------------------------------------------------------- 1 | import * as React from "react" 2 | 3 | import { cn } from "@/lib/utils" 4 | 5 | const Card = React.forwardRef< 6 | HTMLDivElement, 7 | React.HTMLAttributes 8 | >(({ className, ...props }, ref) => ( 9 |
17 | )) 18 | Card.displayName = "Card" 19 | 20 | const CardHeader = React.forwardRef< 21 | HTMLDivElement, 22 | React.HTMLAttributes 23 | >(({ className, ...props }, ref) => ( 24 |
29 | )) 30 | CardHeader.displayName = "CardHeader" 31 | 32 | const CardTitle = React.forwardRef< 33 | HTMLParagraphElement, 34 | React.HTMLAttributes 35 | >(({ className, ...props }, ref) => ( 36 |

44 | )) 45 | CardTitle.displayName = "CardTitle" 46 | 47 | const CardDescription = React.forwardRef< 48 | HTMLParagraphElement, 49 | React.HTMLAttributes 50 | >(({ className, ...props }, ref) => ( 51 |

56 | )) 57 | CardDescription.displayName = "CardDescription" 58 | 59 | const CardContent = React.forwardRef< 60 | HTMLDivElement, 61 | React.HTMLAttributes 62 | >(({ className, ...props }, ref) => ( 63 |

64 | )) 65 | CardContent.displayName = "CardContent" 66 | 67 | const CardFooter = React.forwardRef< 68 | HTMLDivElement, 69 | React.HTMLAttributes 70 | >(({ className, ...props }, ref) => ( 71 |
76 | )) 77 | CardFooter.displayName = "CardFooter" 78 | 79 | export { Card, CardHeader, CardFooter, CardTitle, CardDescription, CardContent } 80 | -------------------------------------------------------------------------------- /src/components/ui/skeleton.tsx: -------------------------------------------------------------------------------- 1 | import { cn } from "@/lib/utils" 2 | 3 | function Skeleton({ 4 | className, 5 | ...props 6 | }: React.HTMLAttributes) { 7 | return ( 8 |
12 | ) 13 | } 14 | 15 | export { Skeleton } 16 | -------------------------------------------------------------------------------- /src/lib/utils.ts: -------------------------------------------------------------------------------- 1 | import { type ClassValue, clsx } from "clsx" 2 | import { twMerge } from "tailwind-merge" 3 | 4 | export function cn(...inputs: ClassValue[]) { 5 | return twMerge(clsx(inputs)) 6 | } 7 | -------------------------------------------------------------------------------- /src/storyteller/VoiceManager.ts: -------------------------------------------------------------------------------- 1 | import { 2 | MemoryVectorIndex, 3 | SpeechGenerationModel, 4 | VectorIndexRetriever, 5 | ZodSchema, 6 | elevenlabs, 7 | generateObject, 8 | lmnt, 9 | openai, 10 | retrieve, 11 | zodSchema, 12 | } from "modelfusion"; 13 | import { readFile } from "node:fs/promises"; 14 | import { z } from "zod"; 15 | 16 | const voiceSchema = z.object({ 17 | provider: z.enum(["lmnt", "elevenlabs"]), 18 | voiceId: z.string(), 19 | name: z.string(), 20 | gender: z.enum(["M", "F"]), 21 | description: z.string(), 22 | }); 23 | 24 | export type Voice = z.infer; 25 | 26 | export class VoiceManager { 27 | private readonly voiceIndex: MemoryVectorIndex; 28 | private readonly narrator: Voice; 29 | private readonly speakerToVoice = new Map(); 30 | 31 | static async fromFile({ 32 | voicesPath, 33 | narrator, 34 | }: { 35 | voicesPath: string; 36 | narrator: Voice; 37 | }): Promise { 38 | const voicesData = await readFile(voicesPath, "utf8"); 39 | 40 | const voiceIndex = await MemoryVectorIndex.deserialize({ 41 | serializedData: voicesData, 42 | schema: new ZodSchema(voiceSchema), 43 | }); 44 | 45 | return new VoiceManager({ voiceIndex, narrator }); 46 | } 47 | 48 | constructor({ 49 | voiceIndex, 50 | narrator, 51 | }: { 52 | voiceIndex: MemoryVectorIndex; 53 | narrator: Voice; 54 | }) { 55 | this.voiceIndex = voiceIndex; 56 | this.narrator = narrator; 57 | } 58 | 59 | async getSpeechModel({ 60 | speaker, 61 | story, 62 | }: { 63 | speaker: string; 64 | story: string; 65 | }): Promise { 66 | let voice = this.speakerToVoice.get(speaker); 67 | 68 | if (voice == null) { 69 | voice = await this.selectVoice({ speaker, story }); 70 | this.speakerToVoice.set(speaker, voice); 71 | } 72 | 73 | switch (voice.provider) { 74 | case "lmnt": 75 | return lmnt.SpeechGenerator({ voice: voice.voiceId }); 76 | case "elevenlabs": 77 | return elevenlabs.SpeechGenerator({ voice: voice.voiceId }); 78 | default: 79 | throw new Error(`Unknown voice provider: ${voice.provider}`); 80 | } 81 | } 82 | 83 | private async selectVoice({ 84 | speaker, 85 | story, 86 | }: { 87 | speaker: string; 88 | story: string; 89 | }): Promise { 90 | // pre-determined narrator voice: 91 | if (speaker.toLowerCase() === "narrator") { 92 | return this.narrator; 93 | } 94 | 95 | // generate voice descriptions for the speakers: 96 | const voiceDescription = await generateObject({ 97 | functionId: "generate-voice-description", 98 | model: openai 99 | .ChatTextGenerator({ model: "gpt-3.5-turbo", temperature: 0 }) 100 | .asFunctionCallObjectGenerationModel({ fnName: "voice" }) 101 | .withTextPrompt(), 102 | schema: zodSchema( 103 | z.object({ 104 | gender: z.string().describe("M for male, F for female)"), 105 | description: z.string().describe("Voice description"), 106 | }) 107 | ), 108 | prompt: [ 109 | `## Task`, 110 | `Generate a voice description for ${speaker} from the following story for an audio book.`, 111 | "The voice should be appropriate for a preschooler listener.", 112 | "Include the gender and age in the voice description.", 113 | "", 114 | "## Story", 115 | story, 116 | "", 117 | "## Speaker", 118 | speaker, 119 | "", 120 | "## Voice description (incl. age, gender)", 121 | ].join("\n"), 122 | }); 123 | 124 | // retrieve the voice vectors from the index: 125 | const potentialVoices = await retrieve( 126 | new VectorIndexRetriever({ 127 | vectorIndex: this.voiceIndex, 128 | embeddingModel: openai.TextEmbedder({ 129 | model: "text-embedding-ada-002", 130 | }), 131 | maxResults: 5, 132 | similarityThreshold: 0.2, 133 | filter: (indexVoice) => 134 | indexVoice.provider === "elevenlabs" && 135 | (["M", "F"].includes(voiceDescription.gender) 136 | ? indexVoice.gender === voiceDescription.gender 137 | : true), 138 | }), 139 | voiceDescription.description, 140 | { functionId: "retrieve-voice" } 141 | ); 142 | 143 | const unavailableVoices = Array.from(this.speakerToVoice.values()).map( 144 | (voice) => `${voice.provider}:${voice.voiceId}` 145 | ); 146 | 147 | const voice = potentialVoices.find( 148 | (voice) => 149 | !unavailableVoices.includes(`${voice.provider}:${voice.voiceId}`) 150 | ); 151 | 152 | if (!voice) { 153 | throw new Error(`No voice found for ${speaker}`); 154 | } 155 | 156 | return voice; 157 | } 158 | } 159 | -------------------------------------------------------------------------------- /src/storyteller/prepareVoices.script.ts: -------------------------------------------------------------------------------- 1 | import dotenv from "dotenv"; 2 | import fs from "fs/promises"; 3 | import { MemoryVectorIndex, openai, upsertIntoVectorIndex } from "modelfusion"; 4 | import { z } from "zod"; 5 | import { Voice } from "./VoiceManager"; 6 | 7 | dotenv.config(); 8 | 9 | async function main() { 10 | try { 11 | const vectorIndex = new MemoryVectorIndex(); 12 | 13 | await addLmntVoices(vectorIndex); 14 | await addElevenLabsVoices(vectorIndex); 15 | 16 | await fs.writeFile("./data/voices.index.json", vectorIndex.serialize()); 17 | } catch (err) { 18 | console.error("Error reading file", err); 19 | } 20 | } 21 | 22 | const lmntVoiceSchema = z.object({ 23 | id: z.string(), 24 | name: z.string(), 25 | gender: z.enum(["M", "F"]), 26 | tags: z.array(z.string()), 27 | description: z.string(), 28 | }); 29 | 30 | type LmntVoice = z.infer; 31 | 32 | async function addLmntVoices(vectorIndex: MemoryVectorIndex) { 33 | const data = await fs.readFile("./data/voices.lmnt.json", "utf8"); 34 | const lmntVoices: LmntVoice[] = Object.values(JSON.parse(data).voices); 35 | 36 | const voices: Voice[] = lmntVoices.map((voice) => ({ 37 | voiceId: voice.id, 38 | name: voice.name, 39 | provider: "lmnt", 40 | gender: voice.gender, 41 | description: 42 | (voice.gender === "M" ? "Male voice. " : "Female voice. ") + 43 | voice.tags.join(" ") + 44 | ". " + 45 | voice.description, 46 | })); 47 | 48 | await upsertIntoVectorIndex({ 49 | vectorIndex, 50 | embeddingModel: openai.TextEmbedder({ model: "text-embedding-ada-002" }), 51 | objects: voices, 52 | getValueToEmbed: (voice) => voice.description, 53 | }); 54 | } 55 | 56 | const elevenLabsVoiceSchema = z.object({ 57 | voice_id: z.string(), 58 | name: z.string(), 59 | labels: z.record(z.string()), 60 | }); 61 | 62 | type ElevenLabsVoice = z.infer; 63 | 64 | async function addElevenLabsVoices(vectorIndex: MemoryVectorIndex) { 65 | const data = await fs.readFile("./data/voices.11labs.json", "utf8"); 66 | const elevenLabsVoices: ElevenLabsVoice[] = Object.values( 67 | JSON.parse(data).voices 68 | ); 69 | 70 | const voices: Voice[] = elevenLabsVoices 71 | .filter((voice) => voice.labels.age === "young") 72 | .map((voice) => ({ 73 | voiceId: voice.voice_id, 74 | name: voice.name, 75 | provider: "elevenlabs", 76 | gender: voice.labels.gender === "female" ? "F" : "M", 77 | description: Object.entries(voice.labels) 78 | .map(([key, value]) => `${key}: ${value}`) 79 | .join(", "), 80 | })); 81 | 82 | await upsertIntoVectorIndex({ 83 | vectorIndex, 84 | embeddingModel: openai.TextEmbedder({ model: "text-embedding-ada-002" }), 85 | objects: voices, 86 | getValueToEmbed: (voice) => voice.description, 87 | }); 88 | } 89 | 90 | main(); 91 | -------------------------------------------------------------------------------- /src/storyteller/server.ts: -------------------------------------------------------------------------------- 1 | import cors from "@fastify/cors"; 2 | import fastifyStatic from "@fastify/static"; 3 | import dotenv from "dotenv"; 4 | import Fastify from "fastify"; 5 | import { modelfusion } from "modelfusion"; 6 | import { 7 | FileSystemAssetStorage, 8 | FileSystemLogger, 9 | modelFusionFastifyPlugin, 10 | } from "modelfusion-experimental/fastify-server"; 11 | import path from "node:path"; 12 | import { storyTellerFlow } from "./storyTellerFlow"; 13 | 14 | dotenv.config(); 15 | 16 | modelfusion.setLogFormat("basic-text"); 17 | 18 | const port = process.env.PORT ? parseInt(process.env.PORT) : 3001; 19 | const host = process.env.HOST ?? "localhost"; 20 | const baseUrl = process.env.BASE_URL ?? `http://${host}:${port}`; 21 | const fsBasePath = process.env.BASE_PATH ?? "runs"; 22 | 23 | export async function main() { 24 | try { 25 | const fastify = Fastify(); 26 | 27 | await fastify.register(cors, {}); 28 | await fastify.register(fastifyStatic, { 29 | root: path.join(__dirname, "..", "..", "out"), 30 | prefix: "/", 31 | }); 32 | 33 | const logger = new FileSystemLogger({ 34 | path: (run) => path.join(fsBasePath, run.runId, "logs"), 35 | }); 36 | 37 | const assetStorage = new FileSystemAssetStorage({ 38 | path: (run) => path.join(fsBasePath, run.runId, "assets"), 39 | logger, 40 | }); 41 | 42 | fastify.register(modelFusionFastifyPlugin, { 43 | baseUrl, 44 | basePath: "/generate-story", 45 | flow: storyTellerFlow, 46 | logger, 47 | assetStorage, 48 | }); 49 | 50 | console.log(`Starting server on port ${port}...`); 51 | await fastify.listen({ port, host }); 52 | console.log("Server started"); 53 | } catch (error) { 54 | console.error("Failed to start server"); 55 | console.error(error); 56 | process.exit(1); 57 | } 58 | } 59 | 60 | main(); 61 | -------------------------------------------------------------------------------- /src/storyteller/storyTellerFlow.ts: -------------------------------------------------------------------------------- 1 | import { 2 | generateImage, 3 | generateSpeech, 4 | generateText, 5 | generateTranscription, 6 | openai, 7 | stability, 8 | streamObject, 9 | zodSchema, 10 | } from "modelfusion"; 11 | import { DefaultFlow } from "modelfusion-experimental/fastify-server"; 12 | import { z } from "zod"; 13 | import { VoiceManager } from "./VoiceManager"; 14 | import { storytellerSchema } from "./storytellerSchema"; 15 | 16 | export const storyTellerFlow = new DefaultFlow({ 17 | schema: storytellerSchema, 18 | async process({ input: { mimeType, audioData }, run }) { 19 | // Transcribe the user voice input: 20 | const transcription = await generateTranscription({ 21 | functionId: "transcribe", 22 | model: openai.Transcriber({ model: "whisper-1" }), 23 | mimeType, 24 | audioData, 25 | }); 26 | 27 | run.publishEvent({ type: "transcribed-input", input: transcription }); 28 | 29 | // Generate a story based on the transcription: 30 | const story = await generateText({ 31 | functionId: "generate-story", 32 | model: openai.CompletionTextGenerator({ 33 | model: "gpt-3.5-turbo-instruct", 34 | temperature: 1.2, 35 | maxGenerationTokens: 1000, 36 | }), 37 | prompt: 38 | "Generate a story aimed at preschoolers on the following topic: \n" + 39 | `'${transcription}'.`, 40 | }); 41 | 42 | // Run in parallel: 43 | await Promise.allSettled([ 44 | // Generate title: 45 | (async () => { 46 | const title = await generateText({ 47 | functionId: "generate-title", 48 | model: openai.CompletionTextGenerator({ 49 | model: "gpt-3.5-turbo-instruct", 50 | temperature: 0.7, 51 | maxGenerationTokens: 200, 52 | stopSequences: ['"'], 53 | }), 54 | prompt: 55 | "Generate a short title for the following story for pre-school children: \n\n" + 56 | `'${story}'.\n\n` + 57 | 'Title: "', 58 | }); 59 | 60 | run.publishEvent({ type: "generated-title", title }); 61 | })(), 62 | 63 | // Generate image that represents story: 64 | (async () => { 65 | const imagePrompt = await generateText({ 66 | functionId: "generate-story-image-prompt", 67 | model: openai 68 | .ChatTextGenerator({ 69 | model: "gpt-4", 70 | temperature: 0, 71 | maxGenerationTokens: 500, 72 | }) 73 | .withTextPrompt(), 74 | prompt: 75 | "Generate a short image generation prompt " + 76 | "(only abstract keywords, max 8 keywords) for the following story: " + 77 | story, 78 | }); 79 | 80 | const storyImage = await generateImage({ 81 | functionId: "generate-story-image", 82 | model: stability 83 | .ImageGenerator({ 84 | model: "stable-diffusion-xl-1024-v1-0", 85 | cfgScale: 7, 86 | height: 1024, 87 | width: 1024, 88 | steps: 30, 89 | }) 90 | .withTextPrompt(), 91 | prompt: `${imagePrompt} style of colorful illustration for a preschooler story`, 92 | }); 93 | 94 | const imagePath = await run.storeBinaryAsset({ 95 | name: "story.png", 96 | data: Buffer.from(storyImage), 97 | contentType: "image/png", 98 | }); 99 | 100 | run.publishEvent({ type: "generated-image", url: imagePath }); 101 | })(), 102 | 103 | // expand and narrate story: 104 | (async () => { 105 | const voiceManager = await VoiceManager.fromFile({ 106 | voicesPath: "./data/voices.index.json", 107 | narrator: { 108 | voiceId: "c8ea4f2a-06e6-4d7b-9484-db941bf7c657", 109 | name: "Joe", 110 | provider: "lmnt", 111 | gender: "M", 112 | description: "Male voice. Middle-aged.", 113 | }, 114 | }); 115 | 116 | const narratedStoryPartSchema = z.object({ 117 | type: z 118 | .enum(["narration", "dialogue"]) 119 | .describe("Type of story part. Either 'narration' or 'dialogue'."), 120 | speaker: z 121 | .string() 122 | .describe( 123 | "Speaker of a dialogue (direct speech) part. Must be a single speaker." 124 | ), 125 | content: z.string().describe("Content of the story part"), 126 | }); 127 | 128 | type NarratedStoryPart = z.infer; 129 | 130 | const structuredStorySchema = z.object({ 131 | parts: z.array(narratedStoryPartSchema), 132 | }); 133 | 134 | const processedParts: Array = []; 135 | 136 | const { objectStream: audioStoryStream, objectPromise } = 137 | await streamObject({ 138 | functionId: "generate-audio-story", 139 | model: openai 140 | .ChatTextGenerator({ 141 | model: "gpt-4", 142 | temperature: 0, 143 | }) 144 | .asFunctionCallObjectGenerationModel({ 145 | fnName: "story", 146 | fnDescription: "Kids story with narration.", 147 | }) 148 | .withTextPrompt(), 149 | schema: zodSchema(structuredStorySchema), 150 | prompt: [ 151 | "Expand the following story into a longer, narrated audio story for preschoolers.", 152 | "", 153 | "The audio story should include interesting dialogue by the main characters.", 154 | "The language should be understandable by a preschooler.", 155 | "", 156 | "Add details and dialog to make the story parts longer.", 157 | "Add the speaker to each dialogue part. A dialogue part can only have one speaker.", 158 | "There must only be one narrator.", 159 | "Each spoken part must be a dialogue part with a speaker.", 160 | "", 161 | "Story:", 162 | story, 163 | ].join("\n"), 164 | fullResponse: true, 165 | }); 166 | 167 | for await (const { partialObject } of audioStoryStream) { 168 | if (partialObject.parts == null) { 169 | continue; 170 | } 171 | 172 | // the last story part might not be complete yet: 173 | const partialParts = partialObject.parts.slice(0, -1); 174 | 175 | // ensure that the remaining story parts are complete: 176 | const partialPartsParseResult = z 177 | .array(narratedStoryPartSchema) 178 | .safeParse(partialParts); 179 | 180 | if (partialPartsParseResult.success) { 181 | await processNewParts(partialPartsParseResult.data); 182 | } 183 | } 184 | 185 | // process the remaining parts: 186 | const audioStory = await objectPromise; 187 | await processNewParts(audioStory.parts); 188 | 189 | async function processNewParts(parts: NarratedStoryPart[]) { 190 | const newParts = parts.slice(processedParts.length); 191 | processedParts.push(...newParts); 192 | 193 | for (const part of newParts) { 194 | const index = processedParts.indexOf(part); 195 | const speaker = part.speaker; 196 | 197 | const narrationAudio = await generateSpeech({ 198 | functionId: "narrate-story-part", 199 | model: await voiceManager.getSpeechModel({ speaker, story }), 200 | text: part.content, 201 | }); 202 | 203 | const path = await run.storeBinaryAsset({ 204 | name: `story-part-${index}.mp3`, 205 | data: Buffer.from(narrationAudio), 206 | contentType: "audio/mpeg", 207 | }); 208 | 209 | run.publishEvent({ 210 | type: "generated-audio-part", 211 | index, 212 | url: path, 213 | }); 214 | } 215 | } 216 | })(), 217 | ]); 218 | }, 219 | }); 220 | -------------------------------------------------------------------------------- /src/storyteller/storytellerSchema.ts: -------------------------------------------------------------------------------- 1 | import { z } from "zod"; 2 | 3 | export const storytellerSchema = { 4 | input: z.object({ 5 | mimeType: z.string(), 6 | audioData: z.string(), 7 | }), 8 | events: z.discriminatedUnion("type", [ 9 | z.object({ 10 | type: z.literal("transcribed-input"), 11 | input: z.string(), 12 | }), 13 | z.object({ 14 | type: z.literal("generated-title"), 15 | title: z.string(), 16 | }), 17 | z.object({ 18 | type: z.literal("generated-image"), 19 | url: z.string(), 20 | }), 21 | z.object({ 22 | type: z.literal("generated-audio-part"), 23 | index: z.number(), 24 | url: z.string(), 25 | }), 26 | ]), 27 | }; 28 | -------------------------------------------------------------------------------- /tailwind.config.js: -------------------------------------------------------------------------------- 1 | /** @type {import('tailwindcss').Config} */ 2 | module.exports = { 3 | darkMode: ["class"], 4 | content: [ 5 | './pages/**/*.{ts,tsx}', 6 | './components/**/*.{ts,tsx}', 7 | './app/**/*.{ts,tsx}', 8 | './src/**/*.{ts,tsx}', 9 | ], 10 | theme: { 11 | container: { 12 | center: true, 13 | padding: "2rem", 14 | screens: { 15 | "2xl": "1400px", 16 | }, 17 | }, 18 | extend: { 19 | colors: { 20 | border: "hsl(var(--border))", 21 | input: "hsl(var(--input))", 22 | ring: "hsl(var(--ring))", 23 | background: "hsl(var(--background))", 24 | foreground: "hsl(var(--foreground))", 25 | primary: { 26 | DEFAULT: "hsl(var(--primary))", 27 | foreground: "hsl(var(--primary-foreground))", 28 | }, 29 | secondary: { 30 | DEFAULT: "hsl(var(--secondary))", 31 | foreground: "hsl(var(--secondary-foreground))", 32 | }, 33 | destructive: { 34 | DEFAULT: "hsl(var(--destructive))", 35 | foreground: "hsl(var(--destructive-foreground))", 36 | }, 37 | muted: { 38 | DEFAULT: "hsl(var(--muted))", 39 | foreground: "hsl(var(--muted-foreground))", 40 | }, 41 | accent: { 42 | DEFAULT: "hsl(var(--accent))", 43 | foreground: "hsl(var(--accent-foreground))", 44 | }, 45 | popover: { 46 | DEFAULT: "hsl(var(--popover))", 47 | foreground: "hsl(var(--popover-foreground))", 48 | }, 49 | card: { 50 | DEFAULT: "hsl(var(--card))", 51 | foreground: "hsl(var(--card-foreground))", 52 | }, 53 | }, 54 | borderRadius: { 55 | lg: "var(--radius)", 56 | md: "calc(var(--radius) - 2px)", 57 | sm: "calc(var(--radius) - 4px)", 58 | }, 59 | keyframes: { 60 | "accordion-down": { 61 | from: { height: 0 }, 62 | to: { height: "var(--radix-accordion-content-height)" }, 63 | }, 64 | "accordion-up": { 65 | from: { height: "var(--radix-accordion-content-height)" }, 66 | to: { height: 0 }, 67 | }, 68 | }, 69 | animation: { 70 | "accordion-down": "accordion-down 0.2s ease-out", 71 | "accordion-up": "accordion-up 0.2s ease-out", 72 | }, 73 | }, 74 | }, 75 | plugins: [require("tailwindcss-animate")], 76 | } -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "es5", 4 | "lib": ["dom", "dom.iterable", "esnext"], 5 | "allowJs": true, 6 | "skipLibCheck": true, 7 | "strict": true, 8 | "noEmit": true, 9 | "esModuleInterop": true, 10 | "module": "esnext", 11 | "moduleResolution": "bundler", 12 | "resolveJsonModule": true, 13 | "isolatedModules": true, 14 | "jsx": "preserve", 15 | "incremental": true, 16 | "plugins": [ 17 | { 18 | "name": "next" 19 | } 20 | ], 21 | "paths": { 22 | "@/*": ["./src/*"] 23 | } 24 | }, 25 | "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"], 26 | "exclude": ["node_modules"] 27 | } 28 | --------------------------------------------------------------------------------