├── README.md ├── synthesis ├── .dockerignore ├── .gitignore ├── Dockerfile ├── package.json ├── src │ ├── config.ts │ ├── controllers │ │ └── synthesisController.ts │ ├── examples │ │ └── llm-paper-summary │ │ │ ├── callServerExample.ts │ │ │ └── index.ts │ ├── flows │ │ ├── formats │ │ │ ├── debate.ts │ │ │ ├── interview.ts │ │ │ └── roundtable.ts │ │ ├── generateHooks.ts │ │ ├── generateScript.ts │ │ ├── index.ts │ │ ├── summarizeSource.ts │ │ └── synthesizeAudio.ts │ ├── middleware │ │ └── errorHandler.ts │ ├── routes │ │ └── synthesis.ts │ ├── schemas │ │ ├── base.ts │ │ ├── formats │ │ │ ├── debate.ts │ │ │ ├── interview.ts │ │ │ └── roundtable.ts │ │ └── podcast.ts │ ├── server.ts │ ├── synthesis.ts │ └── util.ts └── tsconfig.json ├── synthesis_podcast_audio_expert-interview.mp3 └── webapp ├── .gitignore ├── components.json ├── next.config.ts ├── package-lock.json ├── package.json ├── postcss.config.mjs ├── public ├── file.svg ├── globe.svg ├── next.svg ├── vercel.svg └── window.svg ├── src ├── app │ ├── favicon.ico │ ├── firebase.ts │ ├── globals.css │ ├── layout.tsx │ ├── notebooks │ │ ├── [id] │ │ │ ├── NotebookDetail.tsx │ │ │ └── page.tsx │ │ └── page.tsx │ └── page.tsx ├── components │ └── ui │ │ ├── button.tsx │ │ ├── card.tsx │ │ ├── collapsible.tsx │ │ ├── dialog.tsx │ │ ├── input.tsx │ │ ├── label.tsx │ │ ├── select.tsx │ │ ├── sheet.tsx │ │ └── tabs.tsx └── lib │ └── utils.ts ├── tailwind.config.ts └── tsconfig.json /README.md: -------------------------------------------------------------------------------- 1 | # Open-Source NotebookLM Sample 2 | 3 | Build your own NotebookLM-style application using this experimental project as a starting point. Powered by [Genkit](https://genkit.dev). 4 | 5 | Here's an [example](https://github.com/genkit-ai/genkit-notebooklm/raw/refs/heads/main/synthesis_podcast_audio_expert-interview.mp3) - interview generated from an LLM research paper on L1 regularization. 6 | 7 | ## Quickstart 8 | Here's how can you can quickly get started and see your first podcast uploaded to Cloud Storage: 9 | 10 | 1. **Enable Cloud Text-to-Speech API** 11 | - Go to [Google Cloud Console](https://console.cloud.google.com) 12 | - Select your project 13 | - Navigate to "APIs & Services" > "Library" 14 | - Search for "Cloud Text-to-Speech API" 15 | - Click "Enable". It should take you to the details page for this API. 16 | - Navigate to "Credentials" and click on "+ Create Credentials" > "Service Account". 17 | - Use "genkit-notebooklm" for the service account name, no roles, and click "Create & Continue". It should take you back to the API Details page. 18 | - Navigate to "Service Accounts" section of the page, click on the service account you just created, and click "Keys" > "Add Key" > "Create New Key". 19 | - Download the key in JSON format, and rename it "credentials.json". Put this JSON file in the `synthesis/` directory. 20 | 21 | 22 | 2. **Create a .env file** 23 | ```bash 24 | vim synthesis/.env 25 | ``` 26 | 27 | 3. **Get a Gemini API Key** 28 | - Create a new API Key or copy an existing one from [Google AI Studio](https://aistudio.google.com/app/apikey). 29 | - Paste it into the above .env file: 30 | 31 | ```bash 32 | GOOGLE_API_KEY=xxxxx 33 | ``` 34 | 35 | 5. **Run the test command** 36 | ```bash 37 | cd synthesis 38 | ts-node src/examples/llm-paper-summary/index.ts 39 | ``` 40 | 41 | 6. **You can serve the synthesize() method as an Express server** 42 | ```bash 43 | cd synthesis 44 | ts-node src/server.ts 45 | ``` 46 | 47 | ## What's Included? 48 | - **AI-powered synthesis utilities** – Out-of-the-box utilities for converting diverse input sources (PDFs, text, and more coming soon) into structured, consumable audio formats like roundtable discussions, formal debates, and expert interviews. This codebase is meant to be used as a starting point for your own customizable pipelines. 49 | - **Serverless backend (Cloud Run)** – Built to deploy on Cloud Run, so you can easily spin up a Serverless API that your frontends can consume. 50 | - **Next.js boilerplate UI** – So that you can build your own NotebookLM-inspired experience. Easily deployable to Firebase App Hosting. 51 | 52 | Get started quickly, customize as needed, and bring AI-powered research synthesis to your own applications. 53 | 54 | ## Who is This For? 55 | 56 | This sample is designed to be a starting point for developers, startups, and researchers looking to integrate AI-powered content synthesis into their applications without building everything from scratch. 57 | 58 | ## Usage 59 | You can easily generate AI-powered podcasts from any text content by configuring the synthesis options. The system is flexible and can handle various podcast formats including: 60 | 61 | 1. One-on-one interviews 62 | 2. Multi-speaker roundtables 63 | 3. Moderated panel discussions 64 | 4. And more... 65 | 66 | To generate a podcast: 67 | 68 | 1. Create a podcast configuration object defining your desired format and speakers (see examples below) 69 | 2. Prepare your input sources. Each source can be either: gs:// URL (pdf only) OR the raw source text as a string). 70 | 3. Define your podcast configuration - this library gives you control over the final generated content format. 71 | 4. Call the synthesis method. 72 | 73 | ## Example podcast configurations 74 | 75 | ``` 76 | // Roundtable podcast 77 | // Custom speakers with specified TTS voices 78 | // Includes a moderator 79 | export const simpleRoundtableConfig = { 80 | format: "roundtable", 81 | title: "ai-trends-roundtable", 82 | speakers: [ 83 | { 84 | name: "Dr. Sarah Chen", 85 | voiceId: "en-US-Neural2-F", 86 | background: "AI Researcher" 87 | }, 88 | { 89 | name: "Mark Thompson", 90 | voiceId: "en-US-Neural2-D", 91 | background: "Tech Journalist" 92 | }, 93 | { 94 | name: "Lisa Wong", 95 | voiceId: "en-US-Journey-F", 96 | background: "AI Ethics Expert" 97 | } 98 | ], 99 | moderator: { 100 | name: "Michael Brooks", 101 | voiceId: "en-US-Journey-D", 102 | style: "facilitating" 103 | }, 104 | discussionStyle: "expert_panel", 105 | structure: "moderated_topics", 106 | bucketName: "your-storage-bucket.firebasestorage.app", 107 | transcriptStorage: "transcripts", 108 | audioStorage: "audio" 109 | }; 110 | ``` 111 | 112 | This example configures a roundtable discussion with multiple speakers, a moderator, and storage settings. 113 | 114 | Here's an interview format example: 115 | 116 | ``` 117 | // Interview podcast 118 | // Tech reporter interviewing an AI Reseracher 119 | export const simpleInterviewConfig = { 120 | format: "interview", 121 | title: "ai-expert-interview", 122 | speakers: [ 123 | { 124 | name: "Dr. James Wilson", 125 | voiceId: "en-US-Neural2-D", 126 | background: "AI Research Director" 127 | }, 128 | { 129 | name: "Emily Parker", 130 | voiceId: "en-US-Journey-F", 131 | background: "Tech Reporter" 132 | } 133 | ], 134 | interviewStyle: "freeform", 135 | intervieweeName: "Dr. James Wilson", 136 | topic: "Future of AI Technology", 137 | maxQuestions: 5, 138 | bucketName: "your-storage-bucke.firebasestorage.app", 139 | transcriptStorage: "transcripts", 140 | audioStorage: "audio" 141 | }; 142 | ``` 143 | This example shows an interview format where a tech reporter interviews an AI researcher. 144 | 145 | Here's a debate format example that structures a discussion between experts with opposing views: 146 | 147 | ``` 148 | export const ethicalDebateConfig = { 149 | format: "debate", 150 | title: "ethical-debate", 151 | speakers: [ 152 | { 153 | name: "Professor Smith", 154 | voiceId: "en-US-Journey-D", 155 | background: "AI Safety Expert at Oxford" 156 | }, 157 | { 158 | name: "Dr. Zhang", 159 | voiceId: "en-US-Neural2-D", 160 | background: "AI Development Lead at OpenAI" 161 | } 162 | ], 163 | debateTopic: "AI Safety vs Innovation Speed", 164 | debateStructure: "formal", 165 | numRounds: 3, 166 | moderator: { 167 | name: "Rachel Adams", 168 | voiceId: "en-US-Journey-F", 169 | style: "neutral", 170 | openingRemarks: true, 171 | closingRemarks: true 172 | }, 173 | sides: [ 174 | { 175 | sideName: "Safety First", 176 | speakers: ["Professor Smith"], 177 | keyPoints: [ 178 | "We must implement rigorous testing protocols before deploying AI systems", 179 | "AI alignment and value learning need to be solved before scaling capabilities", 180 | "Historical examples show rushing technology leads to unintended consequences" 181 | ] 182 | }, 183 | { 184 | sideName: "Innovation Priority", 185 | speakers: ["Dr. Zhang"], 186 | keyPoints: [ 187 | "Slowing AI progress cedes ground to less responsible actors", 188 | "AI can solve urgent challenges in healthcare, climate change, and poverty", 189 | "Robust AI development processes already exist - we need execution not delay" 190 | ] 191 | } 192 | ], 193 | bucketName: "your-storage-bucket.firebasestorage.app", 194 | transcriptStorage: "transcripts", 195 | audioStorage: "audio" 196 | }; 197 | ``` 198 | > **Note**: For detailed configuration schemas and options for each podcast format, see the TypeScript interfaces in `src/schemas/*.ts` 199 | 200 | ## OPTIONAL: Demo Web App & Firebase Integrations 201 | We bundled a demo web app that you can use to make your own, custom version of NotebookLM! To use it, you'll need to set up the following: 202 | 203 | ### Firebase Configurations 204 | `synthesis/src/config.ts` has `USE_FIRESTORE` and `USE_STORAGE` configurations. 205 | 206 | 1. `USE_FIRESTORE`: 207 | - If turned on, the synthesize() method will store job metadata inside Firestore. Information such as generated transcript, discussion hooks, etc. are included as metadata. 208 | - The current podcast generation step is reported in the job metadata and updated while podcast generation is in progress. This helps you support frontends that show interactive status updates. 209 | - (Note: The included web app doesn't use a background job architecture, thus it doesn't show the status updates in real time. That requires an additional dependency on Cloud Tasks or other queueing system, and we wanted to keep this sample easily runnable locally.) 210 | 211 | 2. `USE_STORAGE`: 212 | - If turned on, the synthesize() method will upload the generated podcast to Cloud Storage. The specific location can be defined via the podcast options. 213 | - If turned on, the server can also accept gs:// URLs as sources. If the files at those source bucket locations are either .pdf or .txt format, they will be included in the generated podcast. 214 | 215 | ### Web App 216 | You can cd into `webapp/` folder. The easiest way to test it out is to run your Express server locally, and point the web app to the local URL. To do that you can follow these steps: 217 | 218 | 1. Update `webapp/config.ts` to include the correct `firebaseConfig` values from the Firebase Console. 219 | 2. Create a `webapp/.env` file and include BACKEND_HOST=localhost:3000 (or whatever port you've configured your Express server to run on). 220 | -------------------------------------------------------------------------------- /synthesis/.dockerignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | npm-debug.log 3 | .env 4 | .env.local 5 | .git 6 | .gitignore 7 | .dockerignore 8 | Dockerfile 9 | README.md 10 | credentials.json 11 | -------------------------------------------------------------------------------- /synthesis/.gitignore: -------------------------------------------------------------------------------- 1 | # Credentials 2 | credentials.json 3 | 4 | # Dependencies 5 | node_modules/ 6 | package-lock.json 7 | yarn.lock 8 | 9 | # Build outputs 10 | dist/ 11 | build/ 12 | lib/ 13 | 14 | # Environment variables 15 | .env 16 | .env.local 17 | .env.*.local 18 | 19 | # IDE and editor files 20 | .idea/ 21 | .vscode/ 22 | *.swp 23 | *.swo 24 | .DS_Store 25 | 26 | # Logs 27 | logs/ 28 | *.log 29 | npm-debug.log* 30 | yarn-debug.log* 31 | yarn-error.log* 32 | 33 | # Test coverage 34 | coverage/ 35 | 36 | # Temporary files 37 | tmp/ 38 | temp/ 39 | 40 | # PDF files (since your example includes PDF processing) 41 | *.pdf 42 | 43 | # Generated files 44 | *.generated.* 45 | -------------------------------------------------------------------------------- /synthesis/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM node:18-slim 2 | 3 | WORKDIR /app 4 | 5 | # Copy everything at once 6 | COPY . . 7 | 8 | # Install and build 9 | RUN npm install 10 | RUN npm run build 11 | 12 | # Cloud Run will use PORT environment variable 13 | EXPOSE 8080 14 | 15 | CMD [ "npm", "start" ] -------------------------------------------------------------------------------- /synthesis/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "synthesis", 3 | "version": "1.0.0", 4 | "main": "dist/index.js", 5 | "types": "dist/index.d.ts", 6 | "scripts": { 7 | "build": "tsc", 8 | "clean": "rimraf dist", 9 | "prepare": "npm run build", 10 | "start": "node dist/server.js" 11 | }, 12 | "dependencies": { 13 | "@genkit-ai/googleai": "^1.0.4", 14 | "@google-cloud/text-to-speech": "^5.8.0", 15 | "cors": "^2.8.5", 16 | "dotenv": "^16.4.7", 17 | "firebase-admin": "^13.1.0", 18 | "fluent-ffmpeg": "^2.1.3", 19 | "genkit": "^1.0.0", 20 | "helmet": "^8.0.0", 21 | "pdf-parse": "^1.1.1", 22 | "uuid": "^9.0.0", 23 | "zod": "^3.24.2" 24 | }, 25 | "devDependencies": { 26 | "@types/cors": "^2.8.17", 27 | "@types/fluent-ffmpeg": "^2.1.27", 28 | "@types/pdf-parse": "^1.1.4", 29 | "@types/uuid": "^9.0.0", 30 | "rimraf": "^5.0.0", 31 | "typescript": "^5.3.3" 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /synthesis/src/config.ts: -------------------------------------------------------------------------------- 1 | import * as admin from 'firebase-admin'; 2 | import textToSpeech, { TextToSpeechClient } from "@google-cloud/text-to-speech"; 3 | import { genkit } from "genkit"; 4 | import { googleAI } from "@genkit-ai/googleai"; 5 | import { gemini15Flash } from "@genkit-ai/googleai"; 6 | import * as dotenv from 'dotenv'; 7 | import fs from 'fs'; 8 | 9 | dotenv.config(); 10 | 11 | export const USE_CLOUD_STORAGE = true; 12 | export const USE_FIRESTORE = true; 13 | 14 | let db: admin.firestore.Firestore | null; 15 | let firebaseAdmin: admin.app.App | null; 16 | let storage: admin.storage.Storage | null; 17 | let authConfig: admin.AppOptions; 18 | 19 | if (process.env.NODE_ENV === 'production') { 20 | authConfig = { 21 | credential: admin.credential.applicationDefault() 22 | }; 23 | } else { 24 | const credentials = JSON.parse(fs.readFileSync('credentials.json', 'utf-8')); 25 | authConfig = { 26 | credential: admin.credential.cert(credentials) 27 | }; 28 | } 29 | 30 | if (USE_CLOUD_STORAGE || USE_FIRESTORE) { 31 | if (!admin.apps.length) { 32 | admin.initializeApp(authConfig); 33 | } 34 | firebaseAdmin = admin.app(); 35 | } else { 36 | firebaseAdmin = null; 37 | } 38 | 39 | if (USE_CLOUD_STORAGE && !!firebaseAdmin) { 40 | storage = firebaseAdmin.storage(); 41 | } else { 42 | storage = null; 43 | } 44 | 45 | if (USE_FIRESTORE && !!firebaseAdmin) { 46 | db = firebaseAdmin.firestore(); 47 | } else { 48 | db = null; 49 | } 50 | 51 | let ttsCredentials; 52 | let tts: TextToSpeechClient; 53 | 54 | if (process.env.NODE_ENV === 'production') { 55 | tts = new textToSpeech.TextToSpeechClient(); 56 | } else { 57 | ttsCredentials = JSON.parse(fs.readFileSync('credentials.json', 'utf-8')); 58 | tts = new textToSpeech.TextToSpeechClient({ 59 | credentials: ttsCredentials 60 | }); 61 | } 62 | 63 | export const JOBS_COLLECTION = "jobs"; 64 | 65 | export { firebaseAdmin, storage, db, tts }; 66 | 67 | export const ai = genkit({ 68 | plugins: [googleAI()], 69 | model: gemini15Flash, 70 | }); 71 | -------------------------------------------------------------------------------- /synthesis/src/controllers/synthesisController.ts: -------------------------------------------------------------------------------- 1 | import { Request, Response, NextFunction } from 'express'; 2 | import { synthesize } from '../synthesis'; 3 | import { SynthesisRequest } from '../schemas/podcast'; 4 | 5 | export const synthesisController = { 6 | synthesize: async (req: Request, res: Response, next: NextFunction) => { 7 | try { 8 | const synthesisRequest: SynthesisRequest = req.body; 9 | 10 | try { 11 | // Start synthesis process asynchronously 12 | const result = await synthesize(synthesisRequest); 13 | // Emit the response of synthesize 14 | return res.json({ status: 'success', result }); 15 | } catch (error: unknown) { 16 | return res.status(500).json({ status: 'error', message: error instanceof Error ? error.message : 'Unknown error occurred' }); 17 | } 18 | } catch (error) { 19 | next(error); 20 | } 21 | } 22 | }; -------------------------------------------------------------------------------- /synthesis/src/examples/llm-paper-summary/callServerExample.ts: -------------------------------------------------------------------------------- 1 | import fs from 'fs'; 2 | import pdf from 'pdf-parse'; 3 | import path from 'path'; 4 | import axios from 'axios'; 5 | 6 | const dataBuffer = fs.readFileSync(path.join(__dirname, 'regularization.pdf')); 7 | 8 | async function getPdfText() { 9 | const data = await pdf(dataBuffer); 10 | return data.text; 11 | } 12 | 13 | async function loadInput() { 14 | const sourceText = await getPdfText(); 15 | return sourceText; 16 | } 17 | 18 | const url = "localhost:3000/api/synthesis"; 19 | //const url = "https:///api/synthesis"; 20 | 21 | async function main() { 22 | const inputText = await loadInput(); 23 | const expertInterviewConfig = { 24 | format: "interview", 25 | title: "expert-interview", 26 | speakers: [ 27 | { 28 | name: "Dr. Mahsa Taheri", 29 | voiceId: "en-US-Journey-D", 30 | background: "AI Researcher at University of Hamburg" 31 | }, 32 | { 33 | name: "Sarah Chen", 34 | voiceId: "en-US-Journey-F", 35 | background: "Senior Tech Journalist at TechReview" 36 | } 37 | ], 38 | intervieweeName: "Dr. Mahsa Taheri", 39 | topic: "L1 Regularization Breakthroughs", 40 | maxQuestions: 24, 41 | bucketName: "xxxx.firebasestorage.app", 42 | transcriptStorage: "transcripts", 43 | audioStorage: "audio" 44 | }; 45 | const req = { 46 | jobId: 'llm-paper-summary', 47 | input: inputText, 48 | output: [{type: "podcast", options: expertInterviewConfig}] 49 | }; 50 | 51 | try { 52 | const response = await axios.post(url, req, { 53 | headers: { 54 | 'Content-Type': 'application/json' 55 | } 56 | }); 57 | 58 | console.log(response.data); 59 | } catch (error) { 60 | console.error('Error:', error); 61 | } 62 | } 63 | 64 | main(); 65 | -------------------------------------------------------------------------------- /synthesis/src/examples/llm-paper-summary/index.ts: -------------------------------------------------------------------------------- 1 | import fs from 'fs'; 2 | import pdf from 'pdf-parse'; 3 | import path from 'path'; 4 | import { synthesize } from '../../synthesis'; 5 | import { InterviewPodcastOptions, SynthesisRequest } from '../../schemas/podcast'; 6 | 7 | const dataBuffer = fs.readFileSync(path.join(__dirname, 'regularization.pdf')); 8 | 9 | async function getPdfText() { 10 | const data = await pdf(dataBuffer); 11 | return data.text; 12 | } 13 | 14 | async function loadInput() { 15 | const sourceText = await getPdfText(); 16 | return sourceText; 17 | } 18 | 19 | async function main() { 20 | const inputText = await loadInput(); 21 | const expertInterviewConfig: InterviewPodcastOptions = { 22 | format: "interview", 23 | title: "expert-interview", 24 | speakers: [ 25 | { 26 | name: "Dr. James Wilson", 27 | voiceId: "en-US-Journey-D", 28 | background: "AI Research Lead at Stanford" 29 | }, 30 | { 31 | name: "Sarah Chen", 32 | voiceId: "en-US-Journey-F", 33 | background: "Senior Tech Journalist at TechReview" 34 | } 35 | ], 36 | intervieweeName: "Dr. James Wilson", 37 | topic: "Latest Breakthroughs in AI Research", 38 | maxQuestions: 8, 39 | bucketName: "xxxx.firebasestorage.app", 40 | transcriptStorage: "transcripts", 41 | audioStorage: "audio" 42 | }; 43 | const req: SynthesisRequest = { 44 | input: inputText, 45 | output: [{type: "podcast", options: expertInterviewConfig}] 46 | } 47 | const result = await synthesize(req); 48 | console.log(result); 49 | } 50 | 51 | main(); 52 | 53 | 54 | -------------------------------------------------------------------------------- /synthesis/src/flows/formats/debate.ts: -------------------------------------------------------------------------------- 1 | import { z } from "genkit"; 2 | import { ai } from "../../config"; 3 | import { gemini15Flash } from "@genkit-ai/googleai"; 4 | import { debatePodcastOptionsSchema } from "../../schemas/formats/debate"; 5 | 6 | const finalPodcastScriptInputSchema = z.object({ 7 | summary: z.string(), 8 | hooks: z.array(z.string()), 9 | options: debatePodcastOptionsSchema 10 | }); 11 | 12 | const finalPodcastScriptOutputSchema = z.object({ 13 | script: z.array( 14 | z.object({ 15 | speaker: z.string(), 16 | text: z.string(), 17 | }) 18 | ), 19 | }); 20 | 21 | export const debatePodcastScriptFlow = ai.defineFlow( 22 | { 23 | name: "debatePodcastScriptFlow", 24 | inputSchema: finalPodcastScriptInputSchema, 25 | outputSchema: finalPodcastScriptOutputSchema, 26 | }, 27 | async (inputValues: z.infer) => { 28 | const { summary, hooks, options } = inputValues; 29 | 30 | const speakerIntros = options.speakers.map((speaker: { name: string; background?: string }) => 31 | speaker.background ? 32 | `${speaker.name} (${speaker.background})` : 33 | `${speaker.name}` 34 | ).join(', '); 35 | 36 | const prompt = ` 37 | Create a debate-style podcast script featuring these speakers: 38 | ${speakerIntros} 39 | 40 | ${options.debateTopic ? 41 | `The debate topic is: ${options.debateTopic}` : 42 | 'The debate topic should be inferred from the input content.'} 43 | 44 | The script should: 45 | - Include clear opening statements from each side 46 | - Feature structured rebuttals and counter-arguments 47 | - Use direct quotes and evidence to support positions 48 | - Maintain a respectful but passionate tone 49 | - Returns valid JSON array (speaker + lines) 50 | 51 | ${options.debateStructure === 'formal' ? 52 | 'Structure this as a formal debate with clear rounds and timed responses.' : 53 | 'Structure this as an open debate format with natural back-and-forth exchanges.'} 54 | 55 | ${options.moderator ? 56 | `Include ${options.moderator.name} as a ${options.moderator.style} moderator to guide the debate${ 57 | options.moderator.openingRemarks ? ', starting with opening remarks' : '' 58 | }${ 59 | options.moderator.closingRemarks ? ' and ending with closing remarks' : '' 60 | }.` : 61 | 'Allow the debate to flow naturally between speakers with minimal moderation.'} 62 | 63 | ${options.sides ? 64 | `The debate sides are:\n${options.sides.map((side: { sideName: string; speakers: string[]; description?: string; keyPoints?: string[] }) => 65 | `- ${side.sideName}: ${side.speakers.join(', ')}${ 66 | side.description ? `\n Description: ${side.description}` : '' 67 | }${ 68 | side.keyPoints ? `\n Key Points: ${side.keyPoints.join(', ')}` : '' 69 | }` 70 | ).join('\n')}` : 71 | 'Assign speakers to opposing sides based on the content and their backgrounds.'} 72 | 73 | These scripts should be based on the following input sources (summarized below): 74 | ====== BEGIN SUMMARY ====== 75 | ${summary} 76 | ====== END SUMMARY ====== 77 | 78 | These are some conversational hooks that you can use for inspiration to develop the script: 79 | ====== BEGIN HOOKS ====== 80 | ${hooks.join("\n")} 81 | ====== END HOOKS ====== 82 | `; 83 | 84 | const scriptResponse = await ai.generate({ 85 | model: gemini15Flash, 86 | prompt, 87 | config: { temperature: 0.8 }, 88 | output: { schema: finalPodcastScriptOutputSchema }, 89 | }); 90 | 91 | const script = scriptResponse.output?.script || []; 92 | return { script }; 93 | } 94 | ); -------------------------------------------------------------------------------- /synthesis/src/flows/formats/interview.ts: -------------------------------------------------------------------------------- 1 | import { z } from "genkit"; 2 | import { ai } from "../../config"; 3 | import { gemini15Flash } from "@genkit-ai/googleai"; 4 | import { interviewPodcastOptionsSchema } from "../../schemas/formats/interview"; 5 | 6 | 7 | const finalPodcastScriptInputSchema = z.object({ 8 | summary: z.string(), 9 | hooks: z.array(z.string()), 10 | options: interviewPodcastOptionsSchema 11 | }); 12 | 13 | const finalPodcastScriptOutputSchema = z.object({ 14 | script: z.array( 15 | z.object({ 16 | speaker: z.string(), 17 | text: z.string(), 18 | }) 19 | ), 20 | }); 21 | 22 | export const interviewPodcastScriptFlow = ai.defineFlow( 23 | { 24 | name: "interviewPodcastScriptFlow", 25 | inputSchema: finalPodcastScriptInputSchema, 26 | outputSchema: finalPodcastScriptOutputSchema, 27 | }, 28 | async (inputValues: z.infer) => { 29 | const { summary, hooks, options } = inputValues; 30 | 31 | const speakerIntros = options.speakers.map((speaker: { name: string; background?: string }) => 32 | speaker.background ? 33 | `${speaker.name} (${speaker.background})` : 34 | `${speaker.name}` 35 | ).join(', '); 36 | 37 | const prompt = ` 38 | Create an interview-style podcast script featuring these speakers: 39 | ${speakerIntros} 40 | 41 | ${options.intervieweeName ? 42 | `The main interviewee is: ${options.intervieweeName}` : 43 | 'Select the most relevant speaker as the interviewee based on the content.'} 44 | 45 | ${options.topic ? 46 | `The interview topic is: ${options.topic}` : 47 | 'The interview topic should be inferred from the input content.'} 48 | 49 | The script should: 50 | - Include thoughtful questions and detailed responses 51 | - Use direct quotes and specific examples 52 | - Create natural conversation flow 53 | - Balance depth with accessibility 54 | - Returns valid JSON array (speaker + lines) 55 | ${options.rotatingInterviewers ? 56 | 'Multiple interviewers should take turns asking questions.' : 57 | 'The first listed host should be the primary interviewer.'} 58 | 59 | ${options.maxQuestions ? 60 | `Include approximately ${options.maxQuestions} main questions.` : 61 | 'Include approximately 10 main questions in the interview.'} 62 | 63 | These scripts should be based on the following input sources (summarized below): 64 | ====== BEGIN SUMMARY ====== 65 | ${summary} 66 | ====== END SUMMARY ====== 67 | 68 | These are some conversational hooks that you can use for inspiration to develop the script: 69 | ====== BEGIN HOOKS ====== 70 | ${hooks.join("\n")} 71 | ====== END HOOKS ====== 72 | `; 73 | 74 | const scriptResponse = await ai.generate({ 75 | model: gemini15Flash, 76 | prompt, 77 | config: { temperature: 0.8 }, 78 | output: { schema: finalPodcastScriptOutputSchema }, 79 | }); 80 | 81 | const script = scriptResponse.output?.script || []; 82 | return { script }; 83 | } 84 | ); -------------------------------------------------------------------------------- /synthesis/src/flows/formats/roundtable.ts: -------------------------------------------------------------------------------- 1 | import { z } from "genkit"; 2 | import { ai } from "../../config"; 3 | import { gemini15Flash } from "@genkit-ai/googleai"; 4 | import { roundtablePodcastOptionsSchema } from "../../schemas/formats/roundtable"; 5 | 6 | const finalPodcastScriptInputSchema = z.object({ 7 | summary: z.string(), 8 | hooks: z.array(z.string()), 9 | options: roundtablePodcastOptionsSchema, 10 | }); 11 | 12 | const finalPodcastScriptOutputSchema = z.object({ 13 | script: z.array( 14 | z.object({ 15 | speaker: z.string(), 16 | text: z.string(), 17 | }) 18 | ), 19 | }); 20 | 21 | export const roundtablePodcastScriptFlow = ai.defineFlow( 22 | { 23 | name: "roundtablePodcastScriptFlow", 24 | inputSchema: finalPodcastScriptInputSchema, 25 | outputSchema: finalPodcastScriptOutputSchema, 26 | }, 27 | async (inputValues: z.infer) => { 28 | const { summary, hooks, options } = inputValues; 29 | const discussionStyleDescriptions: Record = { 30 | expert_panel: "In-depth discussion with domain experts", 31 | founders_chat: "Candid discussions between startup founders", 32 | trend_analysis: "Discussion focused on analyzing current trends", 33 | industry_roundtable: "Professionals discussing an industry challenge", 34 | brainstorm_session: "Free-flowing discussion of ideas & problem-solving", 35 | }; 36 | 37 | let discussionStyleDescription = ""; 38 | // Support custom discussion style as well 39 | if (options.discussionStyle && !discussionStyleDescriptions[options.discussionStyle]) { 40 | discussionStyleDescription = options.discussionStyle; 41 | } else { 42 | discussionStyleDescription = discussionStyleDescriptions[options.discussionStyle || "expert_panel"]; 43 | } 44 | 45 | const speakerIntros = options.speakers.map((speaker: { name: string; background?: string }) => 46 | speaker.background ? 47 | `${speaker.name} (${speaker.background})` : 48 | `${speaker.name}` 49 | ).join(', '); 50 | 51 | const prompt = ` 52 | Create a ${discussionStyleDescription} style roundtable podcast script featuring these speakers: 53 | ${speakerIntros} 54 | 55 | The script should: 56 | - Uses at least two direct quotes 57 | - Explains data/points 58 | - Includes some debate/disagreement 59 | - Has lighthearted/comedic lines 60 | - Returns valid JSON array (speaker + lines) 61 | 62 | ${options.structure === 'moderated_topics' ? 63 | 'Structure this as a moderated discussion with clear topic transitions.' : 64 | 'Structure this as an open discussion where speakers can naturally interact.'} 65 | 66 | ${options.moderator ? 67 | `Include ${options.moderator.name} as a ${options.moderator.style} moderator to guide the discussion${ 68 | options.moderator.openingRemarks ? ', starting with opening remarks' : '' 69 | }${ 70 | options.moderator.closingRemarks ? ' and ending with closing remarks' : '' 71 | }.` : 72 | 'Allow the conversation to flow naturally between speakers. This is a discussion with no moderation, and speakers naturally interrupt each other.'} 73 | 74 | These scripts should be based on the following input sources (summarized below): 75 | ====== BEGIN SUMMARY ====== 76 | ${summary} 77 | ====== END SUMMARY ====== 78 | 79 | These are some conversational hooks that you can use for inspiration to develop the script: 80 | ====== BEGIN HOOKS ====== 81 | ${hooks.join("\n")} 82 | ====== END HOOKS ====== 83 | `; 84 | 85 | const scriptResponse = await ai.generate({ 86 | model: gemini15Flash, 87 | prompt, 88 | config: { temperature: 0.8 }, 89 | output: { schema: finalPodcastScriptOutputSchema }, 90 | }); 91 | 92 | const script = scriptResponse.output?.script || []; 93 | return { script }; 94 | } 95 | ); -------------------------------------------------------------------------------- /synthesis/src/flows/generateHooks.ts: -------------------------------------------------------------------------------- 1 | import { gemini15Flash } from "@genkit-ai/googleai"; 2 | import { z } from "genkit"; 3 | import { ai } from "../config"; 4 | 5 | const discussionHooksInputSchema = z.object({ 6 | summary: z.string() 7 | }); 8 | 9 | const discussionHooksOutputSchema = z.object({ 10 | hooks: z.array(z.string()), 11 | }); 12 | 13 | export const discussionHooksFlow = ai.defineFlow( 14 | { 15 | name: "discussionHooksFlow", 16 | inputSchema: discussionHooksInputSchema, 17 | outputSchema: discussionHooksOutputSchema, 18 | }, 19 | async (input: z.infer) => { 20 | const { summary } = input; 21 | 22 | const prompt = ` 23 | Given the following summaries: 24 | ${summary} 25 | 26 | Suggest 5-7 angles or hooks for a podcast conversation. 27 | Each one should be a short bullet introducing a question or point. 28 | `; 29 | 30 | const hookResponse = await ai.generate({ 31 | model: gemini15Flash, 32 | prompt, 33 | config: { temperature: 0.7 }, 34 | output: { schema: discussionHooksOutputSchema }, 35 | }); 36 | 37 | const hooks = hookResponse.output; 38 | return hooks || {hooks: []}; 39 | } 40 | ); -------------------------------------------------------------------------------- /synthesis/src/flows/generateScript.ts: -------------------------------------------------------------------------------- 1 | import { z } from "genkit"; 2 | import { ai, storage } from "../config"; 3 | import { uploadFileToStorage } from "../util"; 4 | import { roundtablePodcastScriptFlow } from "./formats/roundtable"; 5 | import { debatePodcastScriptFlow } from "./formats/debate"; 6 | import { interviewPodcastScriptFlow } from "./formats/interview"; 7 | import fs from "fs/promises"; 8 | import { podcastOptionsSchema } from "../schemas/podcast"; 9 | 10 | export const generateScriptFlow = ai.defineFlow( 11 | { 12 | name: "generateScriptFlow", 13 | inputSchema: z.object({ 14 | summary: z.string(), 15 | hooks: z.array(z.string()), 16 | options: podcastOptionsSchema 17 | }), 18 | outputSchema: z.object({ 19 | script: z.array(z.object({ 20 | speaker: z.string(), 21 | text: z.string() 22 | })), 23 | storageUrl: z.string().optional() 24 | }) 25 | }, 26 | async (input) => { 27 | let scriptResult; 28 | switch (input.options.format) { 29 | case "roundtable": 30 | scriptResult = await roundtablePodcastScriptFlow({ 31 | summary: input.summary, 32 | options: input.options, 33 | hooks: input.hooks, 34 | }); 35 | break; 36 | case "debate": 37 | scriptResult = await debatePodcastScriptFlow({ 38 | summary: input.summary, 39 | options: input.options, 40 | hooks: input.hooks, 41 | }); 42 | break; 43 | case "interview": 44 | scriptResult = await interviewPodcastScriptFlow({ 45 | summary: input.summary, 46 | options: input.options, 47 | hooks: input.hooks, 48 | }); 49 | break; 50 | default: 51 | throw new Error(`Unsupported podcast format`); 52 | } 53 | 54 | if (!scriptResult.script) { 55 | throw new Error("Script generation failed - no script content returned"); 56 | } 57 | 58 | let storageUrl; 59 | if (storage) { 60 | // Upload transcript to storage 61 | const transcriptFileName = `transcript_${Date.now()}.json`; 62 | const storagePath = `${input.options.transcriptStorage}/${transcriptFileName}`; 63 | const bucket = storage.bucket(input.options.bucketName); 64 | const transcriptContent = JSON.stringify(scriptResult.script, null, 2); 65 | await fs.writeFile(transcriptFileName, transcriptContent); 66 | 67 | storageUrl = await uploadFileToStorage(bucket, transcriptFileName, storagePath); 68 | 69 | // Cleanup temp file 70 | await fs.unlink(transcriptFileName).catch(err => 71 | console.warn("Could not remove transcript file:", transcriptFileName, err) 72 | ); 73 | } 74 | 75 | return { 76 | script: scriptResult.script, 77 | storageUrl 78 | }; 79 | } 80 | ); -------------------------------------------------------------------------------- /synthesis/src/flows/index.ts: -------------------------------------------------------------------------------- 1 | import { z } from "genkit"; 2 | import { ai, db, JOBS_COLLECTION, USE_CLOUD_STORAGE, USE_FIRESTORE } from "../config"; 3 | import { podcastOptionsSchema } from "../schemas/podcast"; 4 | import { summarizeSourcesFlow } from "./summarizeSource"; 5 | import { synthesizeAudioFlow } from "./synthesizeAudio"; 6 | import { discussionHooksFlow } from "./generateHooks"; 7 | import { generateScriptFlow } from "./generateScript"; 8 | 9 | export enum JobStatus { 10 | QUEUED = "QUEUED", 11 | PROCESSING = "PROCESSING", 12 | COMPLETED = "COMPLETED", 13 | ERROR = "ERROR", 14 | } 15 | 16 | 17 | const endToEndPodcastInputSchema = z.object({ 18 | sourceTexts: z.array(z.string()), 19 | jobId: z.string(), 20 | options: podcastOptionsSchema, 21 | }); 22 | 23 | const endToEndPodcastOutputSchema = z.object({ 24 | audioFileName: z.string(), 25 | script: z.array( 26 | z.object({ 27 | speaker: z.string(), 28 | text: z.string(), 29 | }) 30 | ), 31 | storageUrl: z.string(), 32 | }); 33 | 34 | export const endToEndPodcastFlow = ai.defineFlow( 35 | { 36 | name: "endToEndPodcastFlow", 37 | inputSchema: endToEndPodcastInputSchema, 38 | outputSchema: endToEndPodcastOutputSchema, 39 | }, 40 | async (input: z.infer) => { 41 | let timer = Date.now(); 42 | const metrics: Record = {}; 43 | const logJobStatus = (status: JobStatus, data: any = {}) => { 44 | if (db) { 45 | const jobRef = db.collection(JOBS_COLLECTION).doc(input.jobId); 46 | return jobRef.set(data, { merge: true }); 47 | } else { 48 | console.log(`Job ${input.jobId} status update:`, { status, ...data }); 49 | return Promise.resolve(); 50 | } 51 | }; 52 | 53 | await logJobStatus(JobStatus.QUEUED, { 54 | status: JobStatus.QUEUED, 55 | jobId: input.jobId, 56 | createdAt: Date.now() 57 | }); 58 | 59 | try { 60 | await logJobStatus(JobStatus.PROCESSING, { 61 | status: JobStatus.PROCESSING, 62 | currentStep: 'Generating summary', 63 | startTime: Date.now() 64 | }); 65 | 66 | const summaryResult = await summarizeSourcesFlow({ 67 | sourceTexts: input.sourceTexts, 68 | }); 69 | metrics.summarize = Date.now() - timer; 70 | timer = Date.now(); 71 | 72 | await logJobStatus(JobStatus.PROCESSING, { 73 | currentStep: 'Generating discussion hooks' 74 | }); 75 | 76 | const hooksResult = await discussionHooksFlow({ 77 | summary: summaryResult.combinedSummary, 78 | }); 79 | metrics.hooks = Date.now() - timer; 80 | timer = Date.now(); 81 | 82 | await logJobStatus(JobStatus.PROCESSING, { 83 | currentStep: 'Generating script' 84 | }); 85 | 86 | const scriptResult = await generateScriptFlow({ 87 | summary: summaryResult.combinedSummary, 88 | hooks: hooksResult.hooks, 89 | options: input.options, 90 | }); 91 | metrics.script = Date.now() - timer; 92 | timer = Date.now(); 93 | 94 | await logJobStatus(JobStatus.PROCESSING, { 95 | currentStep: 'Synthesizing audio' 96 | }); 97 | 98 | const audioResult = await synthesizeAudioFlow({ 99 | script: scriptResult.script, 100 | speakers: input.options.speakers, 101 | moderator: 'moderator' in input.options ? input.options.moderator : undefined, 102 | options: input.options, 103 | }); 104 | metrics.audio = Date.now() - timer; 105 | 106 | await logJobStatus(JobStatus.COMPLETED, { 107 | status: JobStatus.COMPLETED, 108 | currentStep: '', 109 | metrics, 110 | completedAt: Date.now(), 111 | summary: summaryResult.combinedSummary, 112 | hooks: hooksResult.hooks, 113 | script: scriptResult.script, 114 | audioUrl: audioResult.storageUrl 115 | }); 116 | 117 | return { 118 | audioFileName: audioResult.audioFileName, 119 | script: scriptResult.script, 120 | storageUrl: audioResult.storageUrl 121 | }; 122 | } catch (error) { 123 | await logJobStatus(JobStatus.ERROR, { 124 | status: JobStatus.ERROR, 125 | error: error instanceof Error ? error.message : String(error), 126 | failedAt: Date.now() 127 | }); 128 | throw error; 129 | } 130 | } 131 | ); 132 | -------------------------------------------------------------------------------- /synthesis/src/flows/summarizeSource.ts: -------------------------------------------------------------------------------- 1 | import { z } from "genkit"; 2 | import { ai } from "../config"; 3 | import { gemini15Flash } from "@genkit-ai/googleai"; 4 | 5 | // Flow #1: Summarize Source 6 | const summarizeSourceInputSchema = z.object({ 7 | sourceText: z.string(), 8 | }); 9 | 10 | const summarizeSourceOutputSchema = z.object({ 11 | summary: z.string(), 12 | quotesBlock: z.string(), 13 | outlineBlock: z.string(), 14 | }); 15 | 16 | export const summarizeSourceFlow = ai.defineFlow( 17 | { 18 | name: "summarizeSourceFlow", 19 | inputSchema: summarizeSourceInputSchema, 20 | outputSchema: summarizeSourceOutputSchema, 21 | }, 22 | async (inputValues: z.infer) => { 23 | const { sourceText } = inputValues; 24 | 25 | const prompt = ` 26 | You have a piece of text. 27 | 1) Summarize it (2-3 paragraphs). 28 | 2) Provide a short list of direct quotes or excerpts. 29 | 3) Give a bullet-list outline of the key points. 30 | 31 | Source: 32 | ${sourceText} 33 | `; 34 | 35 | const summaryResponse = await ai.generate({ 36 | model: gemini15Flash, 37 | prompt, 38 | config: { temperature: 0.8 }, 39 | output: { schema: summarizeSourceOutputSchema }, 40 | }); 41 | 42 | const summary = summaryResponse.output?.summary || ""; 43 | const quotesBlock = summaryResponse.output?.quotesBlock || ""; 44 | const outlineBlock = summaryResponse.output?.outlineBlock || ""; 45 | 46 | return { summary, quotesBlock, outlineBlock }; 47 | } 48 | ); 49 | 50 | export const summarizeSourcesFlow = ai.defineFlow( 51 | { 52 | name: "summarizeSourcesFlow", 53 | inputSchema: z.object({ 54 | sourceTexts: z.array(z.string()) 55 | }), 56 | outputSchema: z.object({ 57 | combinedSummary: z.string() 58 | }) 59 | }, 60 | async (input) => { 61 | const { sourceTexts } = input; 62 | 63 | // Summarize each source independently 64 | const summaryResults = await Promise.all( 65 | sourceTexts.map((sourceText: string) => 66 | summarizeSourceFlow({ sourceText }) 67 | ) 68 | ); 69 | 70 | // Combine the summaries 71 | const combinedSummary = "------ BEGIN INPUT SOURCE SUMMARIES ------\n" + 72 | summaryResults.map((result: { summary: string, quotesBlock: string }, index: number) => 73 | `SOURCE #${index + 1}:\nSummary: ${result.summary}\nQuotes: ${result.quotesBlock}` 74 | ).join("\n------------\n") + 75 | "\n------ END INPUT SOURCE SUMMARIES -----"; 76 | 77 | return { 78 | combinedSummary 79 | }; 80 | } 81 | ); -------------------------------------------------------------------------------- /synthesis/src/flows/synthesizeAudio.ts: -------------------------------------------------------------------------------- 1 | import { z } from "genkit"; 2 | import { ai, storage, tts } from "../config"; 3 | import { podcastOptionsSchema } from "../schemas/podcast"; 4 | import fs from "fs/promises"; 5 | import ffmpeg from "fluent-ffmpeg"; 6 | import path from "path"; 7 | import { v4 as uuidv4 } from "uuid"; 8 | import { uploadFileToStorage } from "../util"; 9 | import { moderatorSchema, speakerSchema } from "../schemas/base"; 10 | import { USE_CLOUD_STORAGE } from "../config"; 11 | const defaultVoiceId = "en-US-Journey-D"; // fallback voice 12 | 13 | type PodcastScriptLine = { 14 | speaker: string; 15 | text: string; 16 | } 17 | 18 | enum AudioEncoding { 19 | MP3 = "MP3", 20 | AUDIO_ENCODING_UNSPECIFIED = "AUDIO_ENCODING_UNSPECIFIED", 21 | LINEAR16 = "LINEAR16", 22 | OGG_OPUS = "OGG_OPUS", 23 | MULAW = "MULAW", 24 | ALAW = "ALAW", 25 | PCM = "PCM", 26 | } 27 | 28 | const synthesizeAudioInputSchema = z.object({ 29 | script: z.array( 30 | z.object({ 31 | speaker: z.string(), 32 | text: z.string() 33 | }) 34 | ), 35 | moderator: moderatorSchema.optional(), 36 | speakers: z.array(speakerSchema), 37 | options: podcastOptionsSchema 38 | }); 39 | 40 | const synthesizeAudioOutputSchema = z.object({ 41 | audioFileName: z.string(), 42 | storageUrl: z.string(), 43 | }); 44 | 45 | function getVoiceIdForSpeaker(speakerName: string, speakers: z.infer[], moderator?: z.infer): string { 46 | // First check if this is the moderator 47 | if (moderator && moderator.name === speakerName) { 48 | return moderator.voiceId || defaultVoiceId; 49 | } 50 | // Then check regular speakers 51 | const speaker = speakers.find(s => s.name === speakerName); 52 | return speaker?.voiceId || defaultVoiceId; 53 | } 54 | 55 | export const synthesizeAudioFlow = ai.defineFlow( 56 | { 57 | name: "synthesizeAudioFlow", 58 | inputSchema: synthesizeAudioInputSchema, 59 | outputSchema: synthesizeAudioOutputSchema, 60 | }, 61 | async (inputValues: z.infer) => { 62 | const { script, speakers, moderator, options } = inputValues; 63 | 64 | const outputFileName = `podcast_audio_${options.title || uuidv4()}.mp3`; 65 | const storagePath = `${options.audioStorage}/${outputFileName}`; 66 | const bucketName = options.bucketName; 67 | let storageUrl = ""; 68 | if (bucketName && USE_CLOUD_STORAGE) { 69 | const bucket = storage?.bucket(bucketName); 70 | storageUrl = await synthesizePodcastAudio(script, bucket, bucketName, outputFileName, storagePath, speakers, moderator); 71 | } else { 72 | storageUrl = ""; 73 | } 74 | return { audioFileName: outputFileName, storageUrl }; 75 | } 76 | ); 77 | 78 | /** 79 | * This function loops through each line of the final podcast script, 80 | * synthesizes the audio for each line, and writes out separate mp3 segments. 81 | * Finally, it merges all segments into a single mp3 file named "finalPodcast.mp3". 82 | */ 83 | export async function synthesizePodcastAudio( 84 | script: PodcastScriptLine[], 85 | bucket: any | null, 86 | bucketName: string, 87 | outputFileName: string, 88 | storagePath: string, 89 | speakers: z.infer[] = [], 90 | moderator?: z.infer, 91 | ) { 92 | console.log('Starting audio synthesis for', script.length, 'sections'); 93 | const concurrency = 3; 94 | const startTime = Date.now(); 95 | const synthesisMetrics = { 96 | totalSegments: 0, 97 | avgSegmentTime: 0, 98 | mergeTime: 0, 99 | uploadTime: 0, 100 | retries: 0, 101 | }; 102 | 103 | // Add line count metrics 104 | const totalLines = script.length; 105 | synthesisMetrics.totalSegments = totalLines; 106 | console.log(`Total lines to synthesize: ${totalLines}`); 107 | 108 | // Add retry helper function 109 | const withRetry = async (fn: () => Promise, retries = 3, delayMs = 1000): Promise => { 110 | try { 111 | return await fn(); 112 | } catch (error) { 113 | if (retries > 0) { 114 | console.log(`Retrying... attempts left: ${retries}`); 115 | await new Promise(resolve => setTimeout(resolve, delayMs)); 116 | return withRetry(fn, retries - 1, delayMs * 2); // Exponential backoff 117 | } 118 | throw error; 119 | } 120 | }; 121 | 122 | const processSegment = async ({ line, segmentIndex }: { line: PodcastScriptLine, segmentIndex: number }) => { 123 | const segmentStart = Date.now(); 124 | const segmentFileName = `segment_${segmentIndex}_${line.speaker}.mp3`; 125 | 126 | await withRetry(async () => { 127 | console.log(`Synthesizing audio for segment ${segmentIndex}`); 128 | // Get voice name and infer language code from speaker 129 | const name = getVoiceIdForSpeaker(line.speaker, speakers, moderator); 130 | const languageCode = name.split('-')[0] + '-' + name.split('-')[1]; 131 | const [response] = await tts.synthesizeSpeech({ 132 | input: { text: line.text }, 133 | voice: {languageCode, name}, 134 | audioConfig: { 135 | audioEncoding: AudioEncoding.MP3, 136 | effectsProfileId: ["small-bluetooth-speaker-class-device"], 137 | pitch: 0, 138 | speakingRate: 1, 139 | }, 140 | }); 141 | 142 | if (!response.audioContent) { 143 | throw new Error("No audio content received"); 144 | } 145 | 146 | await fs.writeFile(segmentFileName, response.audioContent, "binary"); 147 | }); 148 | 149 | const segmentTime = Date.now() - segmentStart; 150 | synthesisMetrics.avgSegmentTime = (synthesisMetrics.avgSegmentTime * (segmentIndex) + segmentTime) / (segmentIndex + 1); 151 | return segmentFileName; 152 | }; 153 | 154 | const segmentFiles = []; 155 | const allSegments = script.map((line, index) => ({ 156 | line, 157 | segmentIndex: index 158 | })); 159 | 160 | async function processBatch( 161 | segments: { line: PodcastScriptLine, segmentIndex: number }[], 162 | processSegment: (segment: { line: PodcastScriptLine, segmentIndex: number }) => Promise 163 | ) { 164 | return Promise.all(segments.map(processSegment)); 165 | } 166 | 167 | for (let i = 0; i < allSegments.length; i += concurrency) { 168 | const batch = allSegments.slice(i, i + concurrency); 169 | const batchResults = await processBatch(batch, processSegment); 170 | segmentFiles.push(...batchResults); 171 | 172 | // Add delay between batches if there are more segments to process 173 | const hasMoreSegments = i + concurrency < allSegments.length; 174 | if (hasMoreSegments) { 175 | await new Promise(resolve => setTimeout(resolve, 1000)); 176 | } 177 | } 178 | 179 | console.log('Merging', segmentFiles.length, 'audio segments...'); 180 | const mergeStart = Date.now(); 181 | await mergeAudioFiles(segmentFiles, outputFileName); 182 | synthesisMetrics.mergeTime = Date.now() - mergeStart; 183 | 184 | console.log('Uploading merged file to storage...'); 185 | const uploadStart = Date.now(); 186 | let finalOutputPath; 187 | if (USE_CLOUD_STORAGE && bucket) { 188 | const finalOutputFileName = outputFileName; 189 | await uploadFileToStorage(bucket, finalOutputFileName, storagePath); 190 | synthesisMetrics.uploadTime = Date.now() - uploadStart; 191 | console.log('Generating shareable download URL...'); 192 | finalOutputPath = `gs://${bucketName}/${storagePath}`; 193 | console.log('Generated Google Storage URL:', finalOutputPath); 194 | 195 | // Clean up local files after upload 196 | console.log('Cleaning up temporary files...'); 197 | await Promise.all( 198 | segmentFiles.map(file => 199 | fs.unlink(file).catch(err => 200 | console.warn("Could not remove temp file:", file, err) 201 | ) 202 | ) 203 | ); 204 | await fs.unlink(outputFileName).catch(err => 205 | console.warn("Could not remove merged file:", outputFileName, err) 206 | ); 207 | } else { 208 | finalOutputPath = outputFileName; 209 | // Only clean up segment files when keeping final output 210 | console.log('Cleaning up temporary segment files...'); 211 | await Promise.all( 212 | segmentFiles.map(file => 213 | fs.unlink(file).catch(err => 214 | console.warn("Could not remove temp file:", file, err) 215 | ) 216 | ) 217 | ); 218 | } 219 | console.log('Cleanup complete'); 220 | 221 | // Add audio synthesis metrics 222 | console.log('\n=== Audio Synthesis Metrics ==='); 223 | console.log(`Total segments processed: ${synthesisMetrics.totalSegments}`); 224 | console.log(`Average segment time: ${synthesisMetrics.avgSegmentTime.toFixed(1)}ms`); 225 | console.log(`Merge time: ${synthesisMetrics.mergeTime}ms`); 226 | console.log(`Upload time: ${synthesisMetrics.uploadTime}ms`); 227 | console.log(`Retry attempts: ${synthesisMetrics.retries}`); 228 | console.log(`Total audio processing time: ${Date.now() - startTime}ms\n`); 229 | 230 | return finalOutputPath; 231 | } 232 | 233 | // Helper function to promisify ffmpeg merge operation 234 | async function mergeAudioFiles(segmentFiles: string[], outputFileName: string): Promise { 235 | return new Promise((resolve, reject) => { 236 | let command = ffmpeg(); 237 | 238 | segmentFiles.forEach(file => command.input(file)); 239 | 240 | command 241 | .on('error', (err) => reject(err)) 242 | .on('end', () => resolve()) 243 | .mergeToFile(path.join(process.cwd(), outputFileName), path.join(process.cwd(), 'tmp')); 244 | }); 245 | } -------------------------------------------------------------------------------- /synthesis/src/middleware/errorHandler.ts: -------------------------------------------------------------------------------- 1 | import { Request, Response, NextFunction } from 'express'; 2 | 3 | export const errorHandler = ( 4 | err: Error, 5 | req: Request, 6 | res: Response, 7 | next: NextFunction 8 | ) => { 9 | console.error(err.stack); 10 | res.status(500).json({ 11 | error: { 12 | message: err.message, 13 | ...(process.env.NODE_ENV === 'development' ? { stack: err.stack } : {}) 14 | } 15 | }); 16 | }; -------------------------------------------------------------------------------- /synthesis/src/routes/synthesis.ts: -------------------------------------------------------------------------------- 1 | import { Router } from 'express'; 2 | import { synthesisController } from '../controllers/synthesisController'; 3 | 4 | const router = Router(); 5 | 6 | router.post('/', synthesisController.synthesize); 7 | 8 | export { router as synthesisRouter }; -------------------------------------------------------------------------------- /synthesis/src/schemas/base.ts: -------------------------------------------------------------------------------- 1 | import { z } from "zod"; 2 | 3 | export const speakerSchema = z.object({ 4 | name: z.string(), 5 | voiceId: z.string().optional(), 6 | background: z.string().optional(), 7 | }); 8 | 9 | export const basePodcastOptionsSchema = z.object({ 10 | speakers: z.array(speakerSchema), 11 | transcriptStorage: z.string().optional(), 12 | audioStorage: z.string().optional(), 13 | title: z.string().optional(), 14 | bucketName: z.string().optional(), 15 | }); 16 | 17 | export const moderatorSchema = z.object({ 18 | name: z.string(), 19 | voiceId: z.string().optional(), 20 | style: z.string(), 21 | gender: z.enum(["male", "female"]).optional(), 22 | speakingTime: z.number().min(1).max(10).optional(), 23 | openingRemarks: z.boolean().optional(), 24 | closingRemarks: z.boolean().optional() 25 | }); 26 | 27 | -------------------------------------------------------------------------------- /synthesis/src/schemas/formats/debate.ts: -------------------------------------------------------------------------------- 1 | import { z } from "genkit"; 2 | import { basePodcastOptionsSchema, moderatorSchema } from "../base"; 3 | 4 | export const debateSideSchema = z.object({ 5 | sideName: z.string(), 6 | speakers: z.array(z.string()), 7 | description: z.string().optional(), 8 | keyPoints: z.array(z.string()).optional() 9 | }); 10 | 11 | export const debatePodcastOptionsSchema = basePodcastOptionsSchema.extend({ 12 | format: z.literal("debate"), 13 | debateTopic: z.string().optional(), 14 | debateStructure: z.enum(["formal", "open"]).optional(), 15 | numRounds: z.number().min(1).max(10).optional(), 16 | moderator: moderatorSchema.optional(), 17 | autoAssignSides: z.boolean().optional(), 18 | sides: z.array(debateSideSchema).optional() 19 | }); -------------------------------------------------------------------------------- /synthesis/src/schemas/formats/interview.ts: -------------------------------------------------------------------------------- 1 | import { z } from "genkit"; 2 | import { basePodcastOptionsSchema } from "../base"; 3 | 4 | export const interviewPodcastOptionsSchema = basePodcastOptionsSchema.extend({ 5 | format: z.literal("interview"), 6 | 7 | // The guest being interviewed. AI will frame the interview around this speaker. 8 | // If omitted, AI picks the most relevant guest from `speakers[]`. 9 | intervieweeName: z.string().optional(), 10 | 11 | // The topic or guiding question for the interview. 12 | // If omitted, AI infers a topic based on input content. 13 | topic: z.string().optional(), 14 | // If true, multiple interviewers take turns asking questions. 15 | // If false, the first "host" in `speakers[]` is the only interviewer. 16 | rotatingInterviewers: z.boolean().optional(), 17 | 18 | // Max number of questions in the interview. 19 | // Defaults to 10 if unspecified. 20 | // Min: 3, Max: 20 21 | maxQuestions: z.number().optional() 22 | }); 23 | -------------------------------------------------------------------------------- /synthesis/src/schemas/formats/roundtable.ts: -------------------------------------------------------------------------------- 1 | import { z } from "genkit"; 2 | import { basePodcastOptionsSchema, moderatorSchema } from "../base"; 3 | 4 | export const roundtablePodcastOptionsSchema = basePodcastOptionsSchema.extend({ 5 | format: z.literal("roundtable"), 6 | discussionStyle: z.enum([ 7 | "expert_panel", // In-depth discussion with domain experts 8 | "founders_chat", // Candid discussions between startup founders 9 | "trend_analysis", // Discussion focused on analyzing current trends 10 | "industry_roundtable", // Professionals discussing an industry challenge 11 | "brainstorm_session", // Free-flowing discussion of ideas & problem-solving 12 | ]), 13 | structure: z.enum([ 14 | "open_discussion", // No strict topic control, speakers talk naturally 15 | "moderated_topics", // AI-guided transitions between structured topics 16 | ]), 17 | moderator: moderatorSchema.optional() // Optional moderator to guide discussion and ensure balanced speaking time 18 | }); 19 | -------------------------------------------------------------------------------- /synthesis/src/schemas/podcast.ts: -------------------------------------------------------------------------------- 1 | import { z } from "genkit"; 2 | import { interviewPodcastOptionsSchema } from "./formats/interview"; 3 | import { roundtablePodcastOptionsSchema } from "./formats/roundtable"; 4 | import { debatePodcastOptionsSchema } from "./formats/debate"; 5 | 6 | // Defines a discriminated union of podcast format options, using the "format" field as the discriminator. 7 | // This allows type-safe handling of the different podcast formats (interview, roundtable, debate) 8 | // while ensuring the correct options schema is used for each format. 9 | export const podcastOptionsSchema = z.discriminatedUnion("format", [ 10 | interviewPodcastOptionsSchema, 11 | roundtablePodcastOptionsSchema, 12 | debatePodcastOptionsSchema 13 | ]); 14 | 15 | export type InterviewPodcastOptions = z.infer; 16 | export type RoundtablePodcastOptions = z.infer; 17 | export type DebatePodcastOptions = z.infer; 18 | export type PodcastOptions = InterviewPodcastOptions | RoundtablePodcastOptions | DebatePodcastOptions; 19 | 20 | export type OutputType = 21 | // | "summary" 22 | | "podcast"; 23 | 24 | // Define the valid output configurations 25 | export type OutputConfig = 26 | // | { type: "summary"; options: SummaryOptions } 27 | | { type: "podcast"; options: PodcastOptions } 28 | 29 | 30 | /** 31 | * Main Synthesis Request 32 | */ 33 | export interface SynthesisRequest { 34 | /** The input source(s) for synthesis */ 35 | input: string | string[]; // Supports multiple sources (PDFs, URLs, etc.) 36 | /** The desired output formats */ 37 | output: OutputConfig[]; // Supports multiple output types in a single request 38 | } 39 | 40 | export interface SynthesisResult { 41 | //studyGuide?: StudyGuideSection[]; 42 | podcast?: PodcastResult; 43 | } 44 | 45 | // TODO: Need to figure out how to handle remote storage of the actual files generated 46 | export interface PodcastResult { 47 | transcript: string; 48 | storageUrl: string; 49 | audioFilename: string; 50 | } 51 | -------------------------------------------------------------------------------- /synthesis/src/server.ts: -------------------------------------------------------------------------------- 1 | import express from 'express'; 2 | import cors from 'cors'; 3 | import helmet from 'helmet'; 4 | import dotenv from 'dotenv'; 5 | import { synthesisRouter } from './routes/synthesis'; 6 | import { errorHandler } from './middleware/errorHandler'; 7 | 8 | dotenv.config(); 9 | 10 | const app = express(); 11 | const port = process.env.PORT || 8080; 12 | 13 | // Middleware 14 | app.use(helmet()); 15 | app.use(cors()); 16 | app.use(express.json({ limit: '50mb' })); 17 | 18 | // Routes 19 | app.use('/api/synthesis', synthesisRouter); 20 | 21 | // Error handling 22 | app.use(errorHandler); 23 | 24 | app.listen(port, () => { 25 | console.log(`Server is running on port ${port}`); 26 | }); -------------------------------------------------------------------------------- /synthesis/src/synthesis.ts: -------------------------------------------------------------------------------- 1 | //import { generateSummary } from './flows/summaryOLD'; 2 | import { endToEndPodcastFlow } from './flows'; 3 | import { v4 as uuidv4 } from 'uuid'; 4 | import { SynthesisRequest, SynthesisResult, PodcastOptions, PodcastResult } from './schemas/podcast'; 5 | import { getTextFromUrl, isUrl } from './util'; 6 | 7 | 8 | export async function synthesize(request: SynthesisRequest): Promise { 9 | const results: SynthesisResult = {}; 10 | 11 | for (const output of request.output) { 12 | switch (output.type) { 13 | // in the future, support additional types 14 | case 'podcast': 15 | results.podcast = await generatePodcast(request.input, output.options); 16 | break; 17 | } 18 | } 19 | 20 | return results; 21 | } 22 | 23 | async function generatePodcast(input: string | string[], options: PodcastOptions): Promise { 24 | // Generate a unique job ID for tracking this podcast generation 25 | const jobId = `podcast_${uuidv4()}`; 26 | 27 | // Convert input to array if single string 28 | const sourceTexts = Array.isArray(input) ? input : [input]; 29 | 30 | for (let i = 0; i < sourceTexts.length; i++) { 31 | if (isUrl(sourceTexts[i])) { 32 | const { isText, content } = await getTextFromUrl(sourceTexts[i]); 33 | if (isText) { 34 | sourceTexts[i] = content; 35 | } else { 36 | throw new Error(`Failed to retrieve text content from URL: ${sourceTexts[i]}`); 37 | } 38 | } 39 | } 40 | // Call the end-to-end podcast generation flow 41 | const result = await endToEndPodcastFlow({ 42 | sourceTexts, 43 | jobId, 44 | options 45 | }); 46 | 47 | return { 48 | transcript: JSON.stringify(result.script), 49 | audioFilename: result.audioFileName, 50 | storageUrl: result.storageUrl 51 | }; 52 | } 53 | -------------------------------------------------------------------------------- /synthesis/src/util.ts: -------------------------------------------------------------------------------- 1 | import axios from "axios"; 2 | import pdfParse from "pdf-parse"; 3 | import path from "path"; 4 | import mammoth from "mammoth"; // For .docx 5 | import { storage } from './config'; 6 | 7 | export async function uploadFileToStorage(bucket: any, filePath: string, destination: string) { 8 | await bucket.upload(filePath, { 9 | destination, 10 | metadata: { 11 | cacheControl: 'public, max-age=31536000', 12 | }, 13 | }); 14 | console.log(`${filePath} uploaded to ${destination}`); 15 | return `gs://${bucket.name}/${destination}`; 16 | } 17 | 18 | /** 19 | * This function retrieves text content from a given URL. It supports both Google Cloud Storage URLs (gs://) and regular URLs. 20 | * 21 | * 1. If the URL starts with "gs://", it handles it as a Google Cloud Storage URL: 22 | * - Extracts the bucket name and file path from the URL. 23 | * - Downloads the file from the specified Google Cloud Storage bucket. 24 | * 25 | * 2. If the URL is a regular URL: 26 | * - Fetches the file from the URL. 27 | * - Converts the response to a buffer. 28 | * 29 | * 3. Determines the content type and file extension to decide how to process the file: 30 | * - For PDF files (content type includes "pdf" or extension is ".pdf"): 31 | * - Uses pdf-parse to extract text content from the PDF buffer. 32 | * - For plain text files (content type includes "plain" or extension is ".txt"): 33 | * - Converts the buffer to a UTF-8 string. 34 | * - For Word documents (content type includes "word" or extension is ".docx"): 35 | * - Uses mammoth to extract raw text from the Word document buffer. 36 | * - For Markdown files (extension is ".md"): 37 | * - Converts the buffer to a UTF-8 string. 38 | * 39 | * 4. Returns an object containing: 40 | * - isText: A boolean indicating whether the file was successfully processed as text. 41 | * - content: The extracted text content. 42 | * 43 | * If any errors occur during the process, they are logged and re-thrown. 44 | */ 45 | export async function getTextFromUrl(url: string): Promise<{ isText: boolean, content: string }> { 46 | try { 47 | let buffer: Buffer; 48 | let contentType = ""; 49 | 50 | if (url.startsWith("gs://")) { 51 | // Handle Google Cloud Storage URL 52 | if (!storage) { 53 | throw new Error("Storage is not defined. Cannot access Google Cloud Storage."); 54 | } 55 | const bucketName = url.split('/')[2]; 56 | const filePath = url.split('/').slice(3).join('/'); 57 | const file = storage.bucket(bucketName).file(filePath); 58 | const [fileBuffer] = await file.download(); 59 | const [metadata] = await file.getMetadata(); 60 | contentType = metadata.contentType || ""; 61 | buffer = fileBuffer; 62 | } else { 63 | // Handle regular URL 64 | const response = await axios.get(url, { 65 | responseType: 'arraybuffer', 66 | headers: { 67 | 'Accept': '*/*' 68 | } 69 | }); 70 | contentType = response.headers['content-type'] || ""; 71 | buffer = Buffer.from(response.data); 72 | } 73 | 74 | const extension = path.extname(url).toLowerCase(); 75 | 76 | let text = ""; 77 | let isText = false; 78 | 79 | if (contentType.includes("pdf") || extension === ".pdf") { 80 | const data = await pdfParse(buffer); 81 | text = data.text; 82 | isText = true; 83 | } else if (contentType.includes("plain") || extension === ".txt") { 84 | text = buffer.toString("utf-8"); 85 | isText = true; 86 | } else if (contentType.includes("word") || extension === ".docx") { 87 | const result = await mammoth.extractRawText({ buffer }); 88 | text = result.value; 89 | isText = true; 90 | } else if (extension === ".md") { 91 | text = buffer.toString("utf-8"); 92 | isText = true; 93 | } 94 | 95 | return { isText, content: text }; 96 | } catch (error) { 97 | console.error("Error processing file:", error); 98 | throw error; 99 | } 100 | } 101 | 102 | export function isUrl(text: string): boolean { 103 | const trimmed = text.trim(); 104 | // Check for spaces 105 | if (trimmed.includes(' ')) { 106 | return false; 107 | } 108 | 109 | // Check for Google Cloud Storage URLs 110 | if (trimmed.startsWith('gs://')) { 111 | // Basic GCS URL format validation: gs://bucket-name/path 112 | const parts = trimmed.split('/'); 113 | return parts.length >= 3 && parts[2].length > 0; 114 | } 115 | 116 | // Check for standard URLs 117 | try { 118 | new URL(trimmed); 119 | return true; 120 | } catch (_) { 121 | return false; 122 | } 123 | } 124 | -------------------------------------------------------------------------------- /synthesis/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "es2020", 4 | "module": "commonjs", 5 | "declaration": true, 6 | "outDir": "./dist", 7 | "rootDir": "./src", 8 | "strict": true, 9 | "esModuleInterop": true, 10 | "skipLibCheck": true, 11 | "forceConsistentCasingInFileNames": true, 12 | "composite": true 13 | }, 14 | "include": [ 15 | "src/**/*" 16 | ], 17 | "exclude": [ 18 | "node_modules", 19 | "dist" 20 | ] 21 | } -------------------------------------------------------------------------------- /synthesis_podcast_audio_expert-interview.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genkit-ai/genkit-notebooklm/b0514f04e5cf43e03e900104ef4e456c4c085961/synthesis_podcast_audio_expert-interview.mp3 -------------------------------------------------------------------------------- /webapp/.gitignore: -------------------------------------------------------------------------------- 1 | # See https://help.github.com/articles/ignoring-files/ for more about ignoring files. 2 | 3 | # dependencies 4 | /node_modules 5 | /.pnp 6 | .pnp.* 7 | .yarn/* 8 | !.yarn/patches 9 | !.yarn/plugins 10 | !.yarn/releases 11 | !.yarn/versions 12 | 13 | # testing 14 | /coverage 15 | 16 | # next.js 17 | /.next/ 18 | /out/ 19 | 20 | # production 21 | /build 22 | 23 | # misc 24 | .DS_Store 25 | *.pem 26 | 27 | # debug 28 | npm-debug.log* 29 | yarn-debug.log* 30 | yarn-error.log* 31 | .pnpm-debug.log* 32 | 33 | # env files (can opt-in for committing if needed) 34 | .env* 35 | 36 | # vercel 37 | .vercel 38 | 39 | # typescript 40 | *.tsbuildinfo 41 | next-env.d.ts 42 | -------------------------------------------------------------------------------- /webapp/components.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://ui.shadcn.com/schema.json", 3 | "style": "new-york", 4 | "rsc": true, 5 | "tsx": true, 6 | "tailwind": { 7 | "config": "tailwind.config.ts", 8 | "css": "src/app/globals.css", 9 | "baseColor": "neutral", 10 | "cssVariables": true, 11 | "prefix": "" 12 | }, 13 | "aliases": { 14 | "components": "@/components", 15 | "utils": "@/lib/utils", 16 | "ui": "@/components/ui", 17 | "lib": "@/lib", 18 | "hooks": "@/hooks" 19 | }, 20 | "iconLibrary": "lucide" 21 | } -------------------------------------------------------------------------------- /webapp/next.config.ts: -------------------------------------------------------------------------------- 1 | import type { NextConfig } from "next"; 2 | 3 | const nextConfig: NextConfig = { 4 | /* config options here */ 5 | }; 6 | 7 | export default nextConfig; 8 | -------------------------------------------------------------------------------- /webapp/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "webapp", 3 | "version": "0.1.0", 4 | "private": true, 5 | "scripts": { 6 | "dev": "next dev", 7 | "build": "next build", 8 | "start": "next start", 9 | "lint": "next lint" 10 | }, 11 | "dependencies": { 12 | "@radix-ui/react-collapsible": "^1.1.2", 13 | "@radix-ui/react-dialog": "^1.1.4", 14 | "@radix-ui/react-label": "^2.1.1", 15 | "@radix-ui/react-select": "^2.1.5", 16 | "@radix-ui/react-slot": "^1.1.1", 17 | "@radix-ui/react-tabs": "^1.1.2", 18 | "axios": "^1.7.9", 19 | "class-variance-authority": "^0.7.1", 20 | "clsx": "^2.1.1", 21 | "firebase": "^11.2.0", 22 | "lucide-react": "^0.473.0", 23 | "next": "15.1.5", 24 | "react": "^19.0.0", 25 | "react-dom": "^19.0.0", 26 | "tailwind-merge": "^2.6.0", 27 | "tailwindcss-animate": "^1.0.7" 28 | }, 29 | "devDependencies": { 30 | "@types/node": "^20", 31 | "@types/react": "^19", 32 | "@types/react-dom": "^19", 33 | "postcss": "^8", 34 | "tailwindcss": "^3.4.1", 35 | "typescript": "^5" 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /webapp/postcss.config.mjs: -------------------------------------------------------------------------------- 1 | /** @type {import('postcss-load-config').Config} */ 2 | const config = { 3 | plugins: { 4 | tailwindcss: {}, 5 | }, 6 | }; 7 | 8 | export default config; 9 | -------------------------------------------------------------------------------- /webapp/public/file.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /webapp/public/globe.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /webapp/public/next.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /webapp/public/vercel.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /webapp/public/window.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /webapp/src/app/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/genkit-ai/genkit-notebooklm/b0514f04e5cf43e03e900104ef4e456c4c085961/webapp/src/app/favicon.ico -------------------------------------------------------------------------------- /webapp/src/app/firebase.ts: -------------------------------------------------------------------------------- 1 | import { initializeApp, getApp } from "firebase/app"; 2 | import { getFirestore } from "firebase/firestore"; 3 | import { getFunctions, connectFunctionsEmulator } from "firebase/functions"; 4 | import dotenv from 'dotenv'; 5 | 6 | dotenv.config(); 7 | 8 | // Replace with your own configuration 9 | const firebaseConfig = { 10 | apiKey: "xxxxx", 11 | authDomain: "xxxxx.firebaseapp.com", 12 | projectId: "xxxxx", 13 | storageBucket: "xxxxx.firebasestorage.app", 14 | messagingSenderId: "xxxxx", 15 | appId: "xxxxx" 16 | }; 17 | 18 | export const bucketName = firebaseConfig.storageBucket; 19 | const app = initializeApp(firebaseConfig); 20 | export const db = getFirestore(app); 21 | export const functions = getFunctions(getApp()); 22 | 23 | if (process.env.NEXT_PUBLIC_USE_FUNCTIONS_EMULATOR === 'true') { 24 | connectFunctionsEmulator(functions, "127.0.0.1", 5001); 25 | } -------------------------------------------------------------------------------- /webapp/src/app/globals.css: -------------------------------------------------------------------------------- 1 | @tailwind base; 2 | @tailwind components; 3 | @tailwind utilities; 4 | 5 | body { 6 | font-family: Arial, Helvetica, sans-serif; 7 | } 8 | 9 | @layer base { 10 | :root { 11 | --background: 0 0% 100%; 12 | --foreground: 0 0% 3.9%; 13 | --card: 0 0% 100%; 14 | --card-foreground: 0 0% 3.9%; 15 | --popover: 0 0% 100%; 16 | --popover-foreground: 0 0% 3.9%; 17 | --primary: 0 0% 9%; 18 | --primary-foreground: 0 0% 98%; 19 | --secondary: 0 0% 96.1%; 20 | --secondary-foreground: 0 0% 9%; 21 | --muted: 0 0% 96.1%; 22 | --muted-foreground: 0 0% 45.1%; 23 | --accent: 0 0% 96.1%; 24 | --accent-foreground: 0 0% 9%; 25 | --destructive: 0 84.2% 60.2%; 26 | --destructive-foreground: 0 0% 98%; 27 | --border: 0 0% 89.8%; 28 | --input: 0 0% 89.8%; 29 | --ring: 0 0% 3.9%; 30 | --chart-1: 12 76% 61%; 31 | --chart-2: 173 58% 39%; 32 | --chart-3: 197 37% 24%; 33 | --chart-4: 43 74% 66%; 34 | --chart-5: 27 87% 67%; 35 | --radius: 0.5rem; 36 | } 37 | .dark { 38 | --background: 0 0% 3.9%; 39 | --foreground: 0 0% 98%; 40 | --card: 0 0% 3.9%; 41 | --card-foreground: 0 0% 98%; 42 | --popover: 0 0% 3.9%; 43 | --popover-foreground: 0 0% 98%; 44 | --primary: 0 0% 98%; 45 | --primary-foreground: 0 0% 9%; 46 | --secondary: 0 0% 14.9%; 47 | --secondary-foreground: 0 0% 98%; 48 | --muted: 0 0% 14.9%; 49 | --muted-foreground: 0 0% 63.9%; 50 | --accent: 0 0% 14.9%; 51 | --accent-foreground: 0 0% 98%; 52 | --destructive: 0 62.8% 30.6%; 53 | --destructive-foreground: 0 0% 98%; 54 | --border: 0 0% 14.9%; 55 | --input: 0 0% 14.9%; 56 | --ring: 0 0% 83.1%; 57 | --chart-1: 220 70% 50%; 58 | --chart-2: 160 60% 45%; 59 | --chart-3: 30 80% 55%; 60 | --chart-4: 280 65% 60%; 61 | --chart-5: 340 75% 55%; 62 | } 63 | } 64 | 65 | @layer base { 66 | * { 67 | @apply border-border; 68 | } 69 | body { 70 | @apply bg-background text-foreground; 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /webapp/src/app/layout.tsx: -------------------------------------------------------------------------------- 1 | import type { Metadata } from "next"; 2 | import { Geist, Geist_Mono } from "next/font/google"; 3 | import "./globals.css"; 4 | 5 | const geistSans = Geist({ 6 | variable: "--font-geist-sans", 7 | subsets: ["latin"], 8 | }); 9 | 10 | const geistMono = Geist_Mono({ 11 | variable: "--font-geist-mono", 12 | subsets: ["latin"], 13 | }); 14 | 15 | export const metadata: Metadata = { 16 | title: "Create Next App", 17 | description: "Generated by create next app", 18 | }; 19 | 20 | export default function RootLayout({ 21 | children, 22 | }: Readonly<{ 23 | children: React.ReactNode; 24 | }>) { 25 | return ( 26 | 27 | 30 | {children} 31 | 32 | 33 | ); 34 | } 35 | -------------------------------------------------------------------------------- /webapp/src/app/notebooks/[id]/NotebookDetail.tsx: -------------------------------------------------------------------------------- 1 | "use client"; 2 | 3 | import { useState, useEffect } from "react"; 4 | import { db, bucketName } from "../../firebase"; 5 | import { doc, onSnapshot, collection, addDoc, getDocs } from "firebase/firestore"; 6 | import Link from "next/link"; 7 | import { Card, CardHeader, CardTitle, CardContent } from "@/components/ui/card"; 8 | import { Collapsible, CollapsibleContent, CollapsibleTrigger } from "@/components/ui/collapsible"; 9 | import { ChevronDown } from "lucide-react"; 10 | import { Button } from "@/components/ui/button"; 11 | import { 12 | Dialog, 13 | DialogContent, 14 | DialogDescription, 15 | DialogFooter, 16 | DialogHeader, 17 | DialogTitle, 18 | DialogTrigger, 19 | } from "@/components/ui/dialog"; 20 | import axios from "axios"; 21 | import { getStorage, ref, getDownloadURL } from "firebase/storage"; 22 | 23 | 24 | interface NotebookDetailClientProps { 25 | id: string; 26 | } 27 | 28 | const MAX_SOURCES = 300; 29 | 30 | interface ScriptSection { 31 | title: string; 32 | content: string; 33 | } 34 | 35 | interface GeneratePodcastResponse { 36 | scriptSections: ScriptSection[]; 37 | storageUrl: string; 38 | } 39 | 40 | interface PodcastJob { 41 | status: string; 42 | audioOutput?: { 43 | storageUrl: string; 44 | }; 45 | currentStep?: string; 46 | } 47 | 48 | export function NotebookDetailClient({ id }: NotebookDetailClientProps) { 49 | const [title, setTitle] = useState(""); 50 | const [sources, setSources] = useState>([]); 51 | const [pastedText, setPastedText] = useState(""); 52 | const [addSourceView, setAddSourceView] = useState<'main' | 'paste'>('main'); 53 | const [isGenerating, setIsGenerating] = useState(false); 54 | const [audioUrl, setAudioUrl] = useState(null); 55 | const [jobId, setJobId] = useState(null); 56 | const [jobStatus, setJobStatus] = useState(null); 57 | 58 | 59 | useEffect(() => { 60 | if (!id || Array.isArray(id)) return; 61 | 62 | // Notebook document listener 63 | const notebookDoc = doc(db, "notebooks", id); 64 | const unsubscribeNotebook = onSnapshot(notebookDoc, (snapshot) => { 65 | const data = snapshot.data(); 66 | if (data) { 67 | setTitle(data.title || ""); 68 | } 69 | }); 70 | 71 | // Sources collection listener 72 | const sourcesCollection = collection(db, "notebooks", id, "sources"); 73 | const unsubscribeSources = onSnapshot(sourcesCollection, (snapshot) => { 74 | const sourcesData = snapshot.docs.map(doc => ({ 75 | id: doc.id, 76 | ...doc.data() as { title: string; content: string } 77 | })); 78 | setSources(sourcesData); 79 | }); 80 | 81 | return () => { 82 | unsubscribeNotebook(); 83 | unsubscribeSources(); 84 | }; 85 | }, [id]); 86 | 87 | const handlePasteTextSubmit = async () => { 88 | if (!id || !pastedText.trim()) return; 89 | 90 | try { 91 | const sourcesCollection = collection(db, "notebooks", id, "sources"); 92 | await addDoc(sourcesCollection, { 93 | title: `Text Source ${new Date().toLocaleString()}`, 94 | content: pastedText, 95 | createdAt: new Date(), 96 | }); 97 | 98 | // Reset form and view 99 | setPastedText(""); 100 | setAddSourceView('main'); 101 | } catch (error) { 102 | console.error("Error adding pasted text source:", error); 103 | } 104 | }; 105 | 106 | const handleGenerateScriptV2 = async () => { 107 | try { 108 | setIsGenerating(true); 109 | setAudioUrl(null); // Reset audio URL when generating new content 110 | 111 | // Fetch sources from the database 112 | const sourcesCollection = collection(db, "notebooks", id, "sources"); 113 | const sourcesSnapshot = await getDocs(sourcesCollection); 114 | const sources = sourcesSnapshot.docs.map((doc) => doc.data().content); 115 | 116 | // TODO: Support various customizations in the UI directly 117 | const podcastOptions = { 118 | format: "roundtable", 119 | discussionStyle: "expert_panel", // Must be one of the predefined styles in the schema 120 | structure: "open_discussion", // Must be one of the predefined structures in the schema 121 | speakers: [ 122 | { 123 | name: "Dr. Mahsa Taheri", 124 | voiceId: "en-US-Journey-D", 125 | background: "AI Researcher at University of Hamburg" 126 | }, 127 | { 128 | name: "Sarah Chen", 129 | voiceId: "en-US-Journey-F", 130 | background: "Senior Tech Journalist at TechReview" 131 | } 132 | ], 133 | audioStorage: "audio", 134 | transcriptStorage: "transcript", 135 | bucketName, 136 | } 137 | const synthesisRequest = { 138 | input: sources, 139 | output: [{ type: 'podcast', options: podcastOptions }] 140 | }; 141 | 142 | const response = await axios.post('http://localhost:8080/api/synthesis', synthesisRequest); 143 | const data = response.data; 144 | 145 | if (data.status === 'success' && data.result.podcast.storageUrl) { 146 | // Get a reference to the storage location 147 | const storage = getStorage(); 148 | const audioRef = ref(storage, data.result.podcast.storageUrl); 149 | 150 | // Get the download URL 151 | const url = await getDownloadURL(audioRef); 152 | setAudioUrl(url); 153 | } 154 | } catch (error) { 155 | console.error("Failed to generate podcast:", error); 156 | } finally { 157 | setIsGenerating(false); 158 | } 159 | }; 160 | 161 | return ( 162 |
163 |
164 | 168 | 169 | Back to Notebooks 170 | 171 |

{title}

172 |
173 | 174 |
175 |
176 | 177 | 178 |
179 | Sources 180 | 181 | 182 | 189 | 190 | 191 | 192 | {addSourceView === 'paste' ? ( 193 |
194 | 201 | Paste copied text 202 |
203 | ) : ( 204 | <> 205 | Add sources 206 | 207 | Sources let NotebookLM base its responses on the information that matters most to you. 208 | (Examples: marketing plans, course reading, research notes, meeting transcripts, sales documents, etc.) 209 | 210 | 211 | )} 212 |
213 | 214 | {addSourceView === 'paste' ? ( 215 | <> 216 |
217 |