├── functions
    ├── tts
    │   ├── __init__.py
    │   ├── gui
    │   │   ├── __init__.py
    │   │   └── player.py
    │   ├── args.py
    │   ├── utils.py
    │   └── processing.py
    ├── scraping
    │   ├── web.py
    │   ├── documents.py
    │   └── reddit.py
    ├── search
    │   ├── google.py
    │   ├── brave.py
    │   ├── discovery.py
    │   └── api.py
    ├── utils.py
    ├── ai.py
    ├── config.py
    ├── args.py
    └── processing
    │   ├── summarization.py
    │   ├── youtube_descriptor.py
    │   └── report_generation.py
├── settings
    ├── music
    │   ├── intro
    │   │   └── Warrior_Intro.mp3
    │   └── outro
    │   │   └── Warrior_Outro.mp3
    ├── images
    │   ├── background
    │   │   └── Podcast_Background.png
    │   ├── guest
    │   │   ├── open
    │   │   │   ├── Reed_Gasp-removebg-preview.png
    │   │   │   ├── Reed_BigMouth-removebg-preview.png
    │   │   │   ├── Reed_Talking-removebg-preview.png
    │   │   │   └── Reed_Talkingv2-removebg-preview.png
    │   │   └── closed
    │   │   │   └── Reed_MouthClosed_Smiling-removebg-preview.png
    │   └── host
    │   │   ├── open
    │   │       ├── Dundell_Open-removebg-preview.png
    │   │       ├── Dundell_Surprised-removebg-preview.png
    │   │       └── Dundell_Talkingv2-removebg-preview.png
    │   │   └── closed
    │   │       └── Dundell_Mouth_Closedv2-removebg-preview.png
    ├── voices
    │   ├── leo.yaml
    │   ├── tara.yaml
    │   └── default.yaml
    ├── llm_settings
    │   └── example_ai_models.yml
    ├── env.example
    └── characters
    │   ├── host.yml
    │   └── guest.yml
├── research
    └── Example_Docs_Folder
    │   ├── Mabinogi F2P Reforges Guide_.docx
    │   ├── Weekly Dungeon Vouchers Guide-.docx
    │   ├── Fynni Gems Passive Income Guide_.docx
    │   ├── Mabinogi Farming Lord_Abyss Passes.docx
    │   └── Mabinogi Adventure Seals Weekly Guide_.docx
├── .gitignore
├── requirements_host.txt
├── run_control_panel.sh
├── templates
    ├── history.html
    ├── settings.html
    ├── main_dashboard.html
    ├── podcast_builder_form.html
    └── script_builder_form.html
├── Installer_Windows.bat
├── run_control_panel.bat
├── README.md
├── installation_readme.md
├── LICENSE
└── static
    └── main.css


/functions/tts/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/functions/tts/gui/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/settings/music/intro/Warrior_Intro.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ETomberg391/Ecne-AI-Podcaster/HEAD/settings/music/intro/Warrior_Intro.mp3


--------------------------------------------------------------------------------
/settings/music/outro/Warrior_Outro.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ETomberg391/Ecne-AI-Podcaster/HEAD/settings/music/outro/Warrior_Outro.mp3


--------------------------------------------------------------------------------
/settings/images/background/Podcast_Background.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ETomberg391/Ecne-AI-Podcaster/HEAD/settings/images/background/Podcast_Background.png


--------------------------------------------------------------------------------
/settings/images/guest/open/Reed_Gasp-removebg-preview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ETomberg391/Ecne-AI-Podcaster/HEAD/settings/images/guest/open/Reed_Gasp-removebg-preview.png


--------------------------------------------------------------------------------
/settings/images/host/open/Dundell_Open-removebg-preview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ETomberg391/Ecne-AI-Podcaster/HEAD/settings/images/host/open/Dundell_Open-removebg-preview.png


--------------------------------------------------------------------------------
/settings/images/guest/open/Reed_BigMouth-removebg-preview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ETomberg391/Ecne-AI-Podcaster/HEAD/settings/images/guest/open/Reed_BigMouth-removebg-preview.png


--------------------------------------------------------------------------------
/settings/images/guest/open/Reed_Talking-removebg-preview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ETomberg391/Ecne-AI-Podcaster/HEAD/settings/images/guest/open/Reed_Talking-removebg-preview.png


--------------------------------------------------------------------------------
/research/Example_Docs_Folder/Mabinogi F2P Reforges Guide_.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ETomberg391/Ecne-AI-Podcaster/HEAD/research/Example_Docs_Folder/Mabinogi F2P Reforges Guide_.docx


--------------------------------------------------------------------------------
/research/Example_Docs_Folder/Weekly Dungeon Vouchers Guide-.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ETomberg391/Ecne-AI-Podcaster/HEAD/research/Example_Docs_Folder/Weekly Dungeon Vouchers Guide-.docx


--------------------------------------------------------------------------------
/settings/images/guest/open/Reed_Talkingv2-removebg-preview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ETomberg391/Ecne-AI-Podcaster/HEAD/settings/images/guest/open/Reed_Talkingv2-removebg-preview.png


--------------------------------------------------------------------------------
/settings/images/host/open/Dundell_Surprised-removebg-preview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ETomberg391/Ecne-AI-Podcaster/HEAD/settings/images/host/open/Dundell_Surprised-removebg-preview.png


--------------------------------------------------------------------------------
/settings/images/host/open/Dundell_Talkingv2-removebg-preview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ETomberg391/Ecne-AI-Podcaster/HEAD/settings/images/host/open/Dundell_Talkingv2-removebg-preview.png


--------------------------------------------------------------------------------
/research/Example_Docs_Folder/Fynni Gems Passive Income Guide_.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ETomberg391/Ecne-AI-Podcaster/HEAD/research/Example_Docs_Folder/Fynni Gems Passive Income Guide_.docx


--------------------------------------------------------------------------------
/research/Example_Docs_Folder/Mabinogi Farming Lord_Abyss Passes.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ETomberg391/Ecne-AI-Podcaster/HEAD/research/Example_Docs_Folder/Mabinogi Farming Lord_Abyss Passes.docx


--------------------------------------------------------------------------------
/settings/images/host/closed/Dundell_Mouth_Closedv2-removebg-preview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ETomberg391/Ecne-AI-Podcaster/HEAD/settings/images/host/closed/Dundell_Mouth_Closedv2-removebg-preview.png


--------------------------------------------------------------------------------
/research/Example_Docs_Folder/Mabinogi Adventure Seals Weekly Guide_.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ETomberg391/Ecne-AI-Podcaster/HEAD/research/Example_Docs_Folder/Mabinogi Adventure Seals Weekly Guide_.docx


--------------------------------------------------------------------------------
/settings/images/guest/closed/Reed_MouthClosed_Smiling-removebg-preview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ETomberg391/Ecne-AI-Podcaster/HEAD/settings/images/guest/closed/Reed_MouthClosed_Smiling-removebg-preview.png


--------------------------------------------------------------------------------
/settings/voices/leo.yaml:
--------------------------------------------------------------------------------
1 | # settings/voices/leo.yaml
2 | gain_factor: 1.0
3 | trim_end_ms: 100
4 | nr_level: 0
5 | compress_thresh: 0.001
6 | compress_ratio: 1
7 | norm_frame_len: 10
8 | norm_gauss_size: 3
9 | deesser_freq: 5000


--------------------------------------------------------------------------------
/settings/voices/tara.yaml:
--------------------------------------------------------------------------------
1 | # settings/voices/tara.yaml
2 | gain_factor: 1
3 | trim_end_ms: 120
4 | nr_level: 35
5 | compress_thresh: 0.03
6 | compress_ratio: 2
7 | norm_frame_len: 20
8 | norm_gauss_size: 15
9 | deesser_freq: 5000


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .env
 2 | settings/llm_settings/ai_models.yml
 3 | /orpheus_tts_setup/*
 4 | functions/__pycache__/*
 5 | */*/__pycache__/*
 6 | /outputs/*
 7 | /research/*
 8 | /uploads/*
 9 | */*/__pycache__/*
10 | */*/*/__pycache__/*
11 | temp_audio/*
12 | 
13 | 


--------------------------------------------------------------------------------
/requirements_host.txt:
--------------------------------------------------------------------------------
 1 | requests
 2 | PyYAML
 3 | python-dotenv
 4 | beautifulsoup4
 5 | newspaper4k
 6 | PyPDF2
 7 | python-docx
 8 | selenium
 9 | soundfile
10 | numpy
11 | Pillow
12 | nltk
13 | pydub
14 | matplotlib
15 | scipy
16 | pygame
17 | lxml_html_clean
18 | flask
19 | moviepy==1.0.3
20 | # audioop-lts not available for Python 3.12 on Windows - using built-in audioop for now


--------------------------------------------------------------------------------
/settings/voices/default.yaml:
--------------------------------------------------------------------------------
1 | # settings/voices/default.yaml
2 | gain_factor: 1.0
3 | trim_end_ms: 0
4 | nr_level: 0
5 | compress_thresh: 1.0 # Threshold effectively disables compression (1.0 = 0dBFS)
6 | compress_ratio: 1    # Ratio 1 means no compression
7 | norm_frame_len: 10   # Lowest default value used
8 | norm_gauss_size: 3   # Lowest default value used (must be odd)
9 | deesser_freq: 3000   # Lowest allowed frequency


--------------------------------------------------------------------------------
/settings/llm_settings/example_ai_models.yml:
--------------------------------------------------------------------------------
 1 | # Model configurations for the AI Podcast Generator
 2 | # Define different models and their parameters for /chat/completions API calls.
 3 | 
 4 | # Configuration for the default model accessed via the current endpoint
 5 | default_model:
 6 |   api_endpoint: ""
 7 |   api_key: "sk1-example"
 8 |   model: "QwQ-32B_Example" # Example: Replace with the actual default model if known
 9 |   temperature: 0.7
10 | 
11 | # Configuration for Gemini 2.0 Flash (Experimental)
12 | gemini_flash:
13 |   api_endpoint: "https://generativelanguage.googleapis.com/v1beta/openai/"
14 |   api_key: ""
15 |   model: "gemini-2.5-flash-preview-05-20"
16 |   max_tokens: 65536
17 |   # top_p: 0.95 # Optional parameter


--------------------------------------------------------------------------------
/settings/env.example:
--------------------------------------------------------------------------------
 1 | # .env file for AI Podcast Generator
 2 | 
 3 | # --- Model Selection ---
 4 | # Specifies the default model configuration to use from ai_models.yml
 5 | # Options: default_model, gemini_flash (or others defined in ai_models.yml)
 6 | DEFAULT_MODEL_CONFIG="default_model" # Change this to 'gemini_flash' to use Gemini by default
 7 | 
 8 | # --- Search APIs ---
 9 | # Google Custom Search API Credentials
10 | # 1. Get API Key from Google Cloud Console (Credentials page)
11 | GOOGLE_API_KEY=""
12 | # 2. Get Search Engine ID (cx) from Programmable Search Engine control panel (make sure "Search entire web" is ON)
13 | GOOGLE_CSE_ID=""
14 | 
15 | # Brave Search API Key (Get from https://api.search.brave.com/)
16 | BRAVE_API_KEY=""
17 | 


--------------------------------------------------------------------------------
/run_control_panel.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Script to activate virtual environment and run control panel app
 4 | # Navigate to the script's directory
 5 | cd "$(dirname "$0")"
 6 | 
 7 | # Check if virtual environment exists
 8 | if [ ! -d "host_venv" ]; then
 9 |     echo "Error: Virtual environment 'host_venv' not found in current directory."
10 |     echo "Please ensure you're running this script from the project root directory."
11 |     exit 1
12 | fi
13 | 
14 | # Check if control_panel_app.py exists
15 | if [ ! -f "control_panel_app.py" ]; then
16 |     echo "Error: control_panel_app.py not found in current directory."
17 |     echo "Please ensure you're running this script from the project root directory."
18 |     exit 1
19 | fi
20 | 
21 | echo "Activating virtual environment..."
22 | source host_venv/bin/activate
23 | 
24 | # Check if activation was successful
25 | if [ $? -ne 0 ]; then
26 |     echo "Error: Failed to activate virtual environment."
27 |     exit 1
28 | fi
29 | 
30 | echo "Virtual environment activated successfully."
31 | echo "Starting Control Panel App..."
32 | echo "================================================================"
33 | 
34 | # Function to open URL in default browser
35 | open_url() {
36 |     local url="http://127.0.0.1:5000"
37 |     echo "Attempting to open $url in your default browser..."
38 |     case "$(uname -s)" in
39 |         Linux*)  xdg-open "$url" >/dev/null 2>&1 & ;;
40 |         Darwin*) open "$url" & ;;
41 |         *)       echo "Please open your browser and navigate to $url" ;;
42 |     esac
43 | }
44 | 
45 | # Open the browser in the background after a short delay
46 | (sleep 2 && open_url) &
47 | 
48 | # Run the control panel app in the foreground
49 | python control_panel_app.py


--------------------------------------------------------------------------------
/templates/history.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |     <meta charset="UTF-8">
 5 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
 6 |     <title>Output History - Ecne AI Podcaster Control Panel</title>
 7 |     <link rel="stylesheet" href="{{ url_for('static', filename='main.css') }}">
 8 | </head>
 9 | <body>
10 |     <div class="container">
11 |         <h1>Output History</h1>
12 |         <p>Browse and manage all your generated scripts, audio, and videos.</p>
13 | 
14 |         <a href="/" class="back-button">Back to Dashboard</a>
15 | 
16 |         {% if output_files %}
17 |         <div class="output-list">
18 |             {% for file in output_files %}
19 |             <div class="output-item">
20 |                 <h3>{{ file.name }}</h3>
21 |                 <p><strong>Type:</strong> {{ file.type | capitalize }}</p>
22 |                 <p><strong>Path:</strong> {{ file.path }}</p>
23 |                 <p><strong>Size:</strong> {{ "%.2f" | format(file.size / 1024 / 1024) }} MB</p>
24 |                 <p><strong>Last Modified:</strong> {{ file.modified }}</p>
25 |                 <a href="{{ url_for('serve_output', filename=file.path) }}" download class="button">Download</a>
26 |                 {% if file.type == 'video' %}
27 |                 <a href="{{ url_for('serve_output', filename=file.path) }}" target="_blank" class="button">View/Play Video</a>
28 |                 {% elif file.type == 'script' %}
29 |                 <a href="{{ url_for('serve_output', filename=file.path) }}" target="_blank" class="button">View Script</a>
30 |                 {% endif %}
31 |             </div>
32 |             {% endfor %}
33 |         </div>
34 |         {% else %}
35 |         <p>No generated output files found yet.</p>
36 |         {% endif %}
37 |     </div>
38 | </body>
39 | </html>


--------------------------------------------------------------------------------
/settings/characters/host.yml:
--------------------------------------------------------------------------------
 1 | # Character Profile: Host
 2 | name: "Eric Dundell"
 3 | podcast_name: "Dundell's Cyberspace"
 4 | profession: "Podcast Host / Tech Enthusiast"
 5 | background: "Former tech journalist, always curious about the 'why' behind technology. Passionate about making complex topics accessible."
 6 | education: "B.A. in Communications"
 7 | personality_traits:
 8 |   - Relaxed and conversational
 9 |   - Deeply inquisitive - always asks follow-up questions
10 |   - Empathetic listener who breaks down complex topics
11 |   - Guides guests to explain terms and concepts
12 |   - Makes topics accessible through analogies
13 |   - Occasionally injects light humor
14 | speaking_style:
15 |   - Always starts topics with "explain like I'm new to this"
16 |   - Uses multiple follow-up questions per topic
17 |   - Asks for real-world examples and implications
18 |   - Ensures terms are explained for listeners
19 |   - Explores each topic thoroughly before moving on
20 |   - Creates smooth transitions between topics
21 | interaction_patterns:
22 |   - Starts each topic with a basic understanding question
23 |   - Follows up on technical terms mentioned by guest
24 |   - Asks for clarification on complex concepts
25 |   - Connects topics to previous discussions
26 |   - Summarizes before changing topics
27 | example_phrases:
28 |   - "For our listeners who are new to this, can you explain what [term] means?"
29 |   - "That's fascinating! Let's break that down a bit. First, what exactly is...?"
30 |   - "You mentioned [term]. Could you explain that in simpler terms?"
31 |   - "So, if I'm understanding correctly, you're saying that..."
32 |   - "That's quite technical. How would this affect everyday users?"
33 |   - "Before we move on, could you give us a real-world example?"
34 |   - "Coming back to what you said about [previous point]..."
35 |   - "Let's explore that aspect a bit more. How does it relate to...?"


--------------------------------------------------------------------------------
/settings/characters/guest.yml:
--------------------------------------------------------------------------------
 1 | # Character Profile: Expert Guest
 2 | name: "Dr. Evelyn Reed" # Can be overridden based on topic if needed
 3 | podcast_name: "Dundell's Cyberspace"
 4 | profession: "Variable - AI Researcher / Ethicist / Specific Field Expert"
 5 | background: "Deeply knowledgeable in their field (determined by the podcast topic). Often involved in research or practical application. Enjoys sharing knowledge."
 6 | education: "Ph.D. in relevant field (e.g., Computer Science, Philosophy, Biology)"
 7 | personality_traits:
 8 |   - Deep expert knowledge with teaching mindset
 9 |   - Welcomes and anticipates follow-up questions
10 |   - Breaks complex topics into digestible parts
11 |   - Connects different concepts naturally
12 |   - Patient with repeated clarification requests
13 |   - Enthusiastic about sharing knowledge
14 | speaking_style:
15 |   - Starts with high-level overview before details
16 |   - Defines technical terms as they're introduced
17 |   - Uses analogies for complex concepts
18 |   - Builds answers in clear, logical steps
19 |   - References previous points to show connections
20 |   - Maintains engaging, conversational tone
21 | response_patterns:
22 |   - Gives brief overview before detailed explanation
23 |   - Anticipates and defines technical terms
24 |   - Provides examples after explaining concepts
25 |   - Welcomes interruptions for clarification
26 |   - Links back to earlier discussions
27 |   - Acknowledges host's analogies and builds on them
28 | example_phrases:
29 |   - "Let me start with a simple overview..."
30 |   - "In basic terms, [concept] means... More specifically..."
31 |   - "When I say [technical term], I mean..."
32 |   - "Think of it like... [analogy]"
33 |   - "This connects to what we discussed earlier about..."
34 |   - "There are three key aspects here. First..."
35 |   - "Your analogy is spot-on, and to build on that..."
36 |   - "Let me break this down step by step..."
37 |   - "The practical impact of this is..."


--------------------------------------------------------------------------------
/Installer_Windows.bat:
--------------------------------------------------------------------------------
 1 | @echo off
 2 | setlocal enabledelayedexpansion
 3 | 
 4 | :: Get the directory where this batch file is located
 5 | set "BATCH_DIR=%~dp0"
 6 | 
 7 | :: Change to the batch file's directory to ensure we're in the right location
 8 | cd /d "%BATCH_DIR%"
 9 | 
10 | :: Check for Administrator privileges
11 | >nul 2>&1 "%SYSTEMROOT%\system32\cacls.exe" "%SYSTEMROOT%\system32\config\system"
12 | 
13 | if '%errorlevel%' NEQ '0' (
14 |     echo.
15 |     echo ================================
16 |     echo    ADMINISTRATOR REQUIRED
17 |     echo ================================
18 |     echo.
19 |     echo This installer must be run with Administrator privileges.
20 |     echo.
21 |     echo Right-click on this batch file and select "Run as administrator"
22 |     echo.
23 |     echo Press any key to close...
24 |     pause >nul
25 |     exit /b 1
26 | )
27 | 
28 | :: If we reach here, we have admin privileges
29 | echo Running Orpheus TTS Windows Installer with Administrator privileges...
30 | echo Current directory: "%CD%"
31 | echo.
32 | 
33 | :: Check if PowerShell script exists
34 | if not exist "settings\install\Installer.ps1" (
35 |     echo ERROR: Installer.ps1 not found in settings\install\ directory.
36 |     echo Current directory: "%CD%"
37 |     echo Batch file directory: "%BATCH_DIR%"
38 |     echo Please ensure the installer files are properly organized.
39 |     echo.
40 |     echo Press any key to close...
41 |     pause >nul
42 |     exit /b 1
43 | )
44 | 
45 | :: Run the PowerShell installer with execution policy bypass from the correct directory
46 | powershell.exe -ExecutionPolicy Bypass -File ".\settings\install\Installer.ps1"
47 | 
48 | :: Check if PowerShell script completed successfully
49 | if '%errorlevel%' NEQ '0' (
50 |     echo.
51 |     echo Installation completed with errors. Check the output above for details.
52 |     echo.
53 |     echo Press any key to close...
54 |     pause >nul
55 |     exit /b %errorlevel%
56 | )
57 | 
58 | echo.
59 | echo Installation completed successfully!
60 | echo Press any key to close...
61 | pause >nul
62 | 


--------------------------------------------------------------------------------
/run_control_panel.bat:
--------------------------------------------------------------------------------
 1 | @echo off
 2 | REM Batch script to activate virtual environment and run control panel app
 3 | REM This script can be double-clicked in Windows File Explorer
 4 | 
 5 | echo ================================================================
 6 | echo  Ecne AI Podcaster - Control Panel Launcher
 7 | echo ================================================================
 8 | echo.
 9 | 
10 | REM Navigate to the script's directory
11 | cd /d "%~dp0"
12 | 
13 | REM Check if virtual environment exists
14 | if not exist "host_venv" (
15 |     echo Error: Virtual environment 'host_venv' not found in current directory.
16 |     echo Please ensure you're running this script from the project root directory.
17 |     echo Run the Installer.ps1 script first to set up the environment.
18 |     echo.
19 |     pause
20 |     exit /b 1
21 | )
22 | 
23 | REM Check if control_panel_app.py exists
24 | if not exist "control_panel_app.py" (
25 |     echo Error: control_panel_app.py not found in current directory.
26 |     echo Please ensure you're running this script from the project root directory.
27 |     echo.
28 |     pause
29 |     exit /b 1
30 | )
31 | 
32 | REM Check if the Python executable exists in the virtual environment
33 | if not exist "host_venv\Scripts\python.exe" (
34 |     echo Error: Python executable not found in virtual environment.
35 |     echo Please ensure the virtual environment was created properly.
36 |     echo You may need to recreate it by running the Installer.ps1 script.
37 |     echo.
38 |     pause
39 |     exit /b 1
40 | )
41 | 
42 | echo Activating virtual environment...
43 | echo Virtual environment found and ready.
44 | echo.
45 | echo Starting Control Panel App...
46 | echo ================================================================
47 | echo The Control Panel will open in your default web browser.
48 | echo If it doesn't open automatically, navigate to: http://localhost:5000
49 | echo.
50 | echo IMPORTANT: Keep this window open while using the Control Panel.
51 | echo To stop the server, close this window or press Ctrl+C.
52 | echo ================================================================
53 | echo.
54 | 
55 | REM Run the control panel app using the virtual environment's Python
56 | call "host_venv\Scripts\activate.bat"
57 | python "control_panel_app.py"
58 | 
59 | REM If we reach here, the app has stopped
60 | echo.
61 | echo Control Panel App has stopped.
62 | echo You can close this window now.
63 | pause


--------------------------------------------------------------------------------
/functions/scraping/web.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | import time
 3 | import random
 4 | from newspaper import Article, ArticleException # Using newspaper4k for better web scraping
 5 | 
 6 | from ..utils import log_to_file, USER_AGENTS # Import utilities including USER_AGENTS
 7 | 
 8 | def scrape_website_url(url):
 9 |     """Scrapes content from a single website URL using newspaper4k."""
10 |     print(f"      - Scraping URL (Newspaper4k): {url}")
11 |     log_to_file(f"Scraping website URL: {url}")
12 |     try:
13 |         headers = {'User-Agent': random.choice(USER_AGENTS)}
14 |         article = Article(url, request_headers=headers, fetch_images=False)
15 |         article.download()
16 |         # Handle potential download errors before parsing
17 |         if article.download_state != 2: # 2 means success
18 |              raise ArticleException(f"Download failed with state {article.download_state}")
19 |         article.parse()
20 | 
21 |         title = article.title
22 |         text = article.text
23 |         publish_date = article.publish_date
24 | 
25 |         if text and len(text) > 150: # Basic quality check
26 |             content = f"Source URL: {url}\n"
27 |             if title: content += f"Title: {title}\n"
28 |             if publish_date: content += f"Published: {publish_date.strftime('%Y-%m-%d') if publish_date else 'N/A'}\n"
29 |             content += f"\nBody:\n{text}"
30 |             print(f"        - Success: Scraped content ({len(text)} chars).")
31 |             log_to_file(f"Website scrape success: {url} ({len(text)} chars)")
32 |             return content.strip()
33 |         elif text:
34 |             print("        - Warning: Scraped text seems too short, skipping.")
35 |             log_to_file(f"Website scrape warning (too short): {url} ({len(text)} chars)")
36 |             return None
37 |         else:
38 |             print("        - Warning: Newspaper4k found no text.")
39 |             log_to_file(f"Website scrape warning (no text): {url}")
40 |             return None
41 | 
42 |     except ArticleException as e: # Assuming newspaper4k still uses ArticleException
43 |         print(f"        - Error (Newspaper4k) scraping {url}: {e}")
44 |         log_to_file(f"Website scrape ArticleException: {url} - {e}")
45 |         return None
46 |     except requests.exceptions.RequestException as e:
47 |          print(f"        - Error (Request) fetching {url}: {e}")
48 |          log_to_file(f"Website scrape RequestException: {url} - {e}")
49 |          return None
50 |     except Exception as e:
51 |         print(f"        - Unexpected error scraping {url}: {e}")
52 |         log_to_file(f"Website scrape Unexpected Error: {url} - {e}")
53 |         return None
54 |     finally:
55 |         time.sleep(random.uniform(1.5, 3)) # Delay between website scrapes


--------------------------------------------------------------------------------
/functions/search/google.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | import time
 3 | import random # For random delays
 4 | 
 5 | from ..utils import log_to_file # Import log_to_file from the utils module
 6 | 
 7 | def search_google_api(query, config, num_results, from_date=None, to_date=None):
 8 |     """Performs search using Google Custom Search API."""
 9 |     urls = []
10 |     api_key = config.get("google_api_key")
11 |     cse_id = config.get("google_cse_id")
12 |     if not api_key or not cse_id:
13 |         log_to_file("Google API search skipped: API Key or CSE ID missing.")
14 |         print("    - Google API search skipped: API Key or CSE ID missing.")
15 |         return None # Indicate skipped/failed
16 | 
17 |     search_url = "https://www.googleapis.com/customsearch/v1"
18 |     effective_query = query
19 |     if from_date: effective_query += f" after:{from_date}"
20 |     if to_date: effective_query += f" before:{to_date}"
21 | 
22 |     print(f"  - Searching Google API: '{effective_query}' (Num: {num_results})")
23 |     log_to_file(f"Google API Search: Query='{effective_query}', Num={num_results}")
24 | 
25 |     params = {'key': api_key, 'cx': cse_id, 'q': effective_query, 'num': min(num_results, 10)} # Google max 10 per req
26 | 
27 |     try:
28 |         response = requests.get(search_url, params=params, timeout=20)
29 |         response.raise_for_status()
30 |         search_data = response.json()
31 | 
32 |         if 'items' in search_data:
33 |             urls = [item['link'] for item in search_data['items'] if 'link' in item]
34 |             print(f"    - Google Found: {len(urls)} results.")
35 |             log_to_file(f"Google API Success: Found {len(urls)} URLs.")
36 |         else:
37 |             print("    - Google Found: 0 results.")
38 |             log_to_file("Google API Success: No items found in response.")
39 | 
40 |         # Check for quota error explicitly
41 |         if 'error' in search_data and search_data['error'].get('code') == 429:
42 |              print("    - !! Google API Quota limit likely reached !!")
43 |              log_to_file("Google API Error: Quota limit reached (429 in response body).")
44 |              return 'quota_error'
45 |         return urls
46 | 
47 |     except requests.exceptions.HTTPError as e:
48 |         print(f"    - Error calling Google API: {e}")
49 |         log_to_file(f"Google API HTTP Error: {e}")
50 |         if e.response.status_code == 429:
51 |             print("    - !! Google API Quota limit likely reached (HTTP 429) !!")
52 |             log_to_file("Google API Error: Quota limit reached (HTTP 429).")
53 |             return 'quota_error'
54 |         return None # General HTTP error
55 |     except requests.exceptions.RequestException as e:
56 |         print(f"    - Error calling Google API: {e}")
57 |         log_to_file(f"Google API Request Error: {e}")
58 |         return None
59 |     except Exception as e:
60 |         print(f"    - Unexpected error during Google API search: {e}")
61 |         log_to_file(f"Google API Unexpected Error: {e}")
62 |         return None
63 |     finally:
64 |         time.sleep(random.uniform(1, 2)) # Delay


--------------------------------------------------------------------------------
/functions/search/brave.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | import time
 3 | import random # For random delays
 4 | import urllib.parse
 5 | import datetime
 6 | 
 7 | from ..utils import log_to_file # Import log_to_file from the utils module
 8 | 
 9 | def search_brave_api(query, config, num_results, from_date=None, to_date=None):
10 |     """Performs search using Brave Search API."""
11 |     urls = []
12 |     api_key = config.get("brave_api_key")
13 |     if not api_key:
14 |         log_to_file("Brave API search skipped: API Key missing.")
15 |         print("    - Brave API search skipped: API Key missing.")
16 |         return None
17 | 
18 |     search_url = "https://api.search.brave.com/res/v1/web/search"
19 |     headers = {"Accept": "application/json", "Accept-Encoding": "gzip", "X-Subscription-Token": api_key}
20 |     effective_query = query
21 |     freshness_param = None
22 | 
23 |     # Brave uses 'freshness=pd:YYYYMMDD,YYYYMMDD'
24 |     if from_date:
25 |         try:
26 |             from_dt = datetime.datetime.strptime(from_date, '%Y-%m-%d')
27 |             freshness_start = from_dt.strftime('%Y%m%d')
28 |             freshness_end = ""
29 |             if to_date:
30 |                 to_dt = datetime.datetime.strptime(to_date, '%Y-%m-%d')
31 |                 freshness_end = to_dt.strftime('%Y%m%d')
32 |             freshness_param = f"pd:{freshness_start},{freshness_end}"
33 |         except ValueError:
34 |             print(f"  - Warning: Invalid date format for Brave freshness '{from_date}' or '{to_date}'. Skipping date filter.")
35 |             log_to_file(f"Brave API Warning: Invalid date format '{from_date}'/'{to_date}' for freshness.")
36 | 
37 |     print(f"  - Searching Brave API: '{effective_query}' (Num: {num_results})")
38 |     log_to_file(f"Brave API Search: Query='{effective_query}', Num={num_results}, Freshness='{freshness_param}'")
39 | 
40 |     params = {'q': effective_query, 'count': num_results}
41 |     if freshness_param: params['freshness'] = freshness_param
42 | 
43 |     try:
44 |         response = requests.get(search_url, headers=headers, params=params, timeout=20)
45 |         response.raise_for_status()
46 |         search_data = response.json()
47 | 
48 |         if 'web' in search_data and 'results' in search_data['web']:
49 |             urls = [item['url'] for item in search_data['web']['results'] if 'url' in item]
50 |             print(f"    - Brave Found: {len(urls)} results.")
51 |             log_to_file(f"Brave API Success: Found {len(urls)} URLs.")
52 |         else:
53 |             print("    - Brave Found: 0 results.")
54 |             log_to_file(f"Brave API Success: No web/results found in response. Structure: {search_data.keys()}")
55 |         return urls
56 | 
57 |     except requests.exceptions.HTTPError as e:
58 |         print(f"    - Error calling Brave API: {e}")
59 |         log_to_file(f"Brave API HTTP Error: {e}")
60 |         if e.response.status_code == 429:
61 |              print("    - !! Brave API Quota limit likely reached (HTTP 429) !!")
62 |              log_to_file("Brave API Error: Quota limit reached (HTTP 429).")
63 |              return 'quota_error'
64 |         return None
65 |     except requests.exceptions.RequestException as e:
66 |         print(f"    - Error calling Brave API: {e}")
67 |         log_to_file(f"Brave API Request Error: {e}")
68 |         return None
69 |     except Exception as e:
70 |         print(f"    - Unexpected error during Brave API search: {e}")
71 |         log_to_file(f"Brave API Unexpected Error: {e}")
72 |         return None
73 |     finally:
74 |         time.sleep(random.uniform(1, 2)) # Delay


--------------------------------------------------------------------------------
/functions/utils.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import datetime
  3 | import re
  4 | import random # Required for USER_AGENTS
  5 | 
  6 | # Global variables to hold the current run's archive directory and log file handler
  7 | run_archive_dir = None
  8 | log_file_path = None
  9 | log_file_handler = None
 10 | 
 11 | def set_run_archive_dir(path):
 12 |     """Sets the global run_archive_dir and initializes the log file path."""
 13 |     global run_archive_dir, log_file_path, log_file_handler
 14 |     # Close any existing log file handler before changing paths
 15 |     if log_file_handler:
 16 |         log_file_handler.close()
 17 |         log_file_handler = None
 18 | 
 19 |     run_archive_dir = path
 20 |     if run_archive_dir:
 21 |         log_file_path = os.path.join(run_archive_dir, f"ai_podcast_run_{datetime.datetime.now().strftime('%Y%m%d')}.log")
 22 |         try:
 23 |             # Open the file in append mode and keep the handler
 24 |             log_file_handler = open(log_file_path, 'a', encoding='utf-8')
 25 |         except IOError as e:
 26 |             print(f"Fatal: Could not open log file for writing at {log_file_path}: {e}")
 27 |             log_file_handler = None
 28 |     else:
 29 |         log_file_path = None
 30 | 
 31 | # User agents for requests/scraping
 32 | USER_AGENTS = [
 33 |     'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
 34 |     'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.1 Safari/605.1.15',
 35 |     'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0',
 36 | ]
 37 | 
 38 | def log_to_file(message):
 39 |     """Appends a message to the log file using the global file handler."""
 40 |     if not log_file_handler:
 41 |         print(f"Warning: Log file handler not available. Could not log: {message}")
 42 |         return
 43 | 
 44 |     try:
 45 |         timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
 46 |         log_file_handler.write(f"[{timestamp}] {message}\n")
 47 |         log_file_handler.flush() # Ensure it's written to disk immediately
 48 |     except Exception as e:
 49 |         print(f"Warning: Could not write to log file {log_file_path}: {e}")
 50 | 
 51 | def close_log_file():
 52 |     """Closes the global log file handler."""
 53 |     global log_file_handler
 54 |     if log_file_handler:
 55 |         try:
 56 |             log_file_handler.close()
 57 |             log_file_handler = None
 58 |             print("Log file closed.")
 59 |         except Exception as e:
 60 |             print(f"Warning: Error closing log file: {e}")
 61 | 
 62 | def clean_thinking_tags(text):
 63 |     """Recursively remove all content within <think>...</think> tags."""
 64 |     if text is None: return ""
 65 |     prev_text = ""
 66 |     current_text = str(text) # Ensure it's a string
 67 |     # Keep cleaning until no more changes are made (handles nested tags)
 68 |     while prev_text != current_text:
 69 |         prev_text = current_text
 70 |         current_text = re.sub(r'<think>.*?</think>', '', prev_text, flags=re.IGNORECASE | re.DOTALL)
 71 |     return current_text.strip()
 72 | 
 73 | def parse_ai_tool_response(response_text, tool_tag):
 74 |     """
 75 |     Parses content within the *last* occurrence of specific <toolTag>...</toolTag> markers
 76 |     after cleaning thinking tags.
 77 |     """
 78 |     cleaned_text = clean_thinking_tags(response_text)
 79 |     if not cleaned_text: return ""
 80 | 
 81 |     # Find the last opening tag (case-insensitive)
 82 |     open_tag = f'<{tool_tag}>'
 83 |     close_tag = f'</{tool_tag}>'
 84 |     last_open_tag_index = cleaned_text.lower().rfind(open_tag.lower()) # Case-insensitive find
 85 | 
 86 |     if last_open_tag_index != -1:
 87 |         # Find the first closing tag *after* the last opening tag (case-insensitive)
 88 |         # Search starting from the position after the last open tag
 89 |         search_start_index = last_open_tag_index + len(open_tag)
 90 |         first_close_tag_index_after_last_open = cleaned_text.lower().find(close_tag.lower(), search_start_index) # Case-insensitive find
 91 | 
 92 |         if first_close_tag_index_after_last_open != -1:
 93 |             # Extract content between the tags
 94 |             start_content_index = last_open_tag_index + len(open_tag)
 95 |             content = cleaned_text[start_content_index:first_close_tag_index_after_last_open]
 96 |             return content.strip()
 97 |         else:
 98 |             # Found opening tag but no corresponding closing tag afterwards
 99 |             log_msg = f"Warning: Found last '<{tool_tag}>' but no subsequent '</{tool_tag}>'. Returning full cleaned response."
100 |             print(f"\n{log_msg}")
101 |             log_to_file(f"{log_msg}\nResponse was:\n{cleaned_text}")
102 |             return cleaned_text # Fallback
103 |     else:
104 |         # No opening tag found at all
105 |         log_msg = f"Warning: Tool tag '<{tool_tag}>' not found in AI response. Returning full cleaned response."
106 |         print(f"\n{log_msg}")
107 |         log_to_file(f"{log_msg}\nResponse was:\n{cleaned_text}")
108 |         return cleaned_text # Fallback


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | Update Summary for 7/18/2025:
  2 | 
  3 | *   **Podcast Builder Enhancements:**
  4 |     *   Fixed padding issues in GUI, ensuring consistent spacing between speakers (750ms) and same-speaker segments (100ms).
  5 |     *   Added "Save and Close" for progress saving and a resume feature for editing completed podcasts.
  6 |     *   Resolved "Missing Audio" errors, enabling segment regeneration for corrupt audio.
  7 | *   **Audio Quality & Trimming:**
  8 |     *   Addressed some of the split-second audio glitches at segment ends, exploring increased trimming (10-150ms) and new viewing tools.
  9 | *   **Script Builder Improvements:**
 10 |     *   Defaulted YouTube description building and streamlined settings into a dropdown menu.
 11 |     *   Introduced "Easy mode" for automated script topic, keywords, and guidance.
 12 | *   **Project Organization:**
 13 |     *   Reworked output folders for better tracking of scripts, archived, and finalized podcast videos.
 14 | *   **Future Work:**
 15 |     *   Simplifying installation scripts (no sudo, transparent `installation_readme`).
 16 |     *   Creating documentation for podcast customization (characters, images, music, voices).
 17 |     *   Researching new TTS services with Docker FastAPI (e.g., Chatterbox) with low VRAM requirements (max 6GB).
 18 | 
 19 | # Ecne AI Podcaster
 20 | 
 21 | Automated AI podcast generation from topic/keywords to final video. Leverages web research, LLMs for scripting, and TTS for audio synthesis.
 22 | 
 23 | ![image](https://github.com/user-attachments/assets/ca081333-1955-4419-a09c-8ec79a11ad38)
 24 | 
 25 | 
 26 | <div style="display: flex; gap: 10px;">
 27 |   <img src="https://github.com/user-attachments/assets/1c910199-bb0c-4181-9a6f-05dc4b351348" alt="Screenshot_20250526_230535" style="width: 50%;"><img src="https://github.com/user-attachments/assets/c06ed2f3-d9aa-4851-8c0c-098f6042bc8f" alt="Screenshot_20250526_230602" style="width: 50%;">
 28 | </div>
 29 | 
 30 | ![ecneAI_Podcast](https://github.com/user-attachments/assets/8ee380bd-aea0-45f1-8651-40784778b7ee)
 31 | 
 32 | ## ✨ Features
 33 | 
 34 | - **Web Control Panel:** Easy-to-use browser interface for the complete podcast creation workflow
 35 | - **Script Generation:** AI-powered research and script writing from topics, keywords, or documents
 36 | - **Podcast Production:** High-quality TTS with Orpheus and video assembly
 37 | - **Docker Integration:** Automated TTS backend setup via Docker
 38 | - **Multi-Voice Support:** Distinct host and guest voices with audio processing
 39 | 
 40 | ---
 41 | 
 42 | ## 🚀 Quick Start
 43 | 
 44 | ### 1. Installation
 45 | Note: Installer.sh made for Linux OS's (Arch Linux tested). Windows installer Pending.
 46 | ```bash
 47 | git clone https://github.com/ETomberg391/Ecne-AI-Podcaster
 48 | cd Ecne-AI-Podcaster
 49 | chmod +x Installer.sh
 50 | ./Installer.sh
 51 | ```
 52 | 
 53 | ### 2. Start the WebGUI Control Panel
 54 | ```bash
 55 | ./run_control_panel.sh
 56 | ```
 57 | 
 58 | ### 3. Access the Web Interface
 59 | Open your browser and go to: **http://localhost:5000**
 60 | 
 61 | ---
 62 | 
 63 | ## 🎛️ Control Panel Features
 64 | 
 65 | The web control panel provides everything you need:
 66 | 
 67 | ### **Dashboard**
 68 | - Quick overview and navigation
 69 | - System status monitoring
 70 | 
 71 | ### **Script Builder**
 72 | - Topic and keyword input
 73 | - Document upload support (PDF, DOCX, TXT)
 74 | - Web search integration (Google/Brave APIs)
 75 | - AI model selection
 76 | - Real-time progress streaming
 77 | 
 78 | ### **Podcast Builder**
 79 | - Script selection from generated scripts
 80 | - Voice configuration (host/guest)
 81 | - Audio and video settings
 82 | - Development mode for segment review
 83 | 
 84 | ### **Settings**
 85 | - API key management (OpenAI, Google, Brave, etc.)
 86 | - LLM model configuration
 87 | - Voice profiles and audio processing
 88 | 
 89 | ### **History**
 90 | - Browse generated scripts and videos
 91 | - Download completed podcasts
 92 | - Archive management
 93 | 
 94 | ### **Docker Management**
 95 | - Start/stop Orpheus TTS services
 96 | - Container status monitoring
 97 | - Automated setup
 98 | 
 99 | ---
100 | 
101 | ## 📋 Prerequisites
102 | 
103 | - Linux OS (Ubuntu/Debian recommended)
104 | - Git, Python 3.8+, Docker, FFmpeg
105 | - NVIDIA GPU with Container Toolkit (recommended for TTS)
106 | 
107 | The installer handles most dependencies automatically.
108 | 
109 | ---
110 | 
111 | ## 🎯 Workflow
112 | 
113 | 1. **Configure Settings:** Add your API keys and select LLM models
114 | 2. **Generate Script:** Enter topic/keywords or upload documents
115 | 3. **Create Podcast:** Select script, choose voices, generate video
116 | 4. **Download:** Access your completed podcast from the History page
117 | 
118 | ---
119 | 
120 | ## 🎬 Examples
121 | 
122 | *   **Mabinogi Reforging Guide:**
123 | *    [![YouTube](https://img.youtube.com/vi/gHvIbpv95iQ/0.jpg)](https://youtu.be/gHvIbpv95iQ?si=yjsy_GlQMz_QKqHH)
124 | *   **Dundell's Cyberspace - What are Game Emulators?:**
125 | *    [![YouTube](https://img.youtube.com/vi/9pTBPMgRlBU/0.jpg)](https://youtu.be/zbZmEwGinoA?si=hSPlLnpuAsajUtsb)
126 | 
127 | ---
128 | 
129 | ## 🙏 Credits
130 | 
131 | Built with [Orpheus-FastAPI](https://github.com/Lex-au/Orpheus-FastAPI) for TTS and [Orpheus TTS](https://github.com/canopyai/Orpheus-TTS) model.
132 | 
133 | ## 📜 License
134 | 
135 | Apache License 2.0
136 | 


--------------------------------------------------------------------------------
/functions/ai.py:
--------------------------------------------------------------------------------
  1 | import requests
  2 | import json
  3 | import time
  4 | import os
  5 | import random # Used for retry delay jitter
  6 | 
  7 | from .utils import log_to_file, clean_thinking_tags # Import necessary functions from utils
  8 | 
  9 | def call_ai_api(prompt, config, tool_name="General", timeout=300, retries=1, base_wait_time=60):
 10 |     """
 11 |     Generic function to call the OpenAI-compatible API with retry logic.
 12 |     - Handles Timeouts and 429 Rate Limit errors with exponential backoff.
 13 |     """
 14 |     print(f"\nSending {tool_name} request to AI...")
 15 |     log_to_file(f"Initiating API Call (Tool: {tool_name})")
 16 | 
 17 |     model_config = config.get("selected_model_config")
 18 |     if not model_config:
 19 |         final_model_key = config.get('final_model_key', 'N/A')
 20 |         print(f"Error: Selected model configuration ('{final_model_key}') not found. Cannot call API.")
 21 |         log_to_file(f"API Call Error: selected_model_config missing for key '{final_model_key}'.")
 22 |         return None, None
 23 | 
 24 |     api_key = model_config.get("api_key")
 25 |     api_endpoint = model_config.get("api_endpoint")
 26 |     if not api_key or not api_endpoint:
 27 |         final_model_key = config.get('final_model_key', 'N/A')
 28 |         print(f"Error: 'api_key' or 'api_endpoint' missing in config for '{final_model_key}'.")
 29 |         log_to_file(f"API Call Error: api_key or api_endpoint missing for model key '{final_model_key}'.")
 30 |         return None, None
 31 | 
 32 |     headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
 33 |     payload = {
 34 |         "model": model_config.get("model"),
 35 |         "messages": [{"role": "user", "content": prompt}],
 36 |     }
 37 |     # Dynamically add optional parameters from config
 38 |     for param in ["temperature", "max_tokens", "top_p"]:
 39 |         if param in model_config:
 40 |             # Ensure correct type, e.g., float for temp, int for tokens
 41 |             try:
 42 |                 if param == "temperature" or param == "top_p":
 43 |                     payload[param] = float(model_config[param])
 44 |                 elif param == "max_tokens":
 45 |                     payload[param] = int(model_config[param])
 46 |             except (ValueError, TypeError):
 47 |                  print(f"Warning: Could not convert '{param}' to the correct type. Using default.")
 48 |                  log_to_file(f"Config Warning: Could not convert '{param}' value '{model_config[param]}'.")
 49 | 
 50 | 
 51 |     if not payload.get("model"):
 52 |         print(f"Error: 'model' key is missing in the final payload for config '{config.get('DEFAULT_MODEL_CONFIG')}'.")
 53 |         log_to_file("API Call Error: 'model' key missing in payload.")
 54 |         return None, None
 55 | 
 56 |     log_to_file(f"API Call Details:\nEndpoint: {api_endpoint}\nPayload: {json.dumps(payload, indent=2)}")
 57 |     full_api_url = api_endpoint.rstrip('/') + "/chat/completions"
 58 | 
 59 |     for attempt in range(retries + 1):
 60 |         try:
 61 |             response = requests.post(full_api_url, headers=headers, json=payload, timeout=timeout)
 62 |             response.raise_for_status()
 63 | 
 64 |             result = response.json()
 65 |             log_to_file(f"Raw API Response (Attempt {attempt + 1}):\n{json.dumps(result, indent=2)}")
 66 | 
 67 |             if not result.get("choices") or not result["choices"][0].get("message") or not result["choices"][0]["message"].get("content"):
 68 |                 raise ValueError("Invalid response structure received from API.")
 69 | 
 70 |             print(f"{tool_name} response received.")
 71 |             message_content = result["choices"][0]["message"]["content"]
 72 |             cleaned_message = clean_thinking_tags(message_content)
 73 |             return message_content, cleaned_message
 74 | 
 75 |         except requests.exceptions.Timeout:
 76 |             error_msg = f"API call timed out after {timeout} seconds (Attempt {attempt + 1}/{retries + 1})."
 77 |             print(f"\n{tool_name} request failed (Timeout).")
 78 |             log_to_file(error_msg)
 79 |             if attempt >= retries:
 80 |                 return None, None  # Final attempt failed
 81 | 
 82 |         except requests.exceptions.HTTPError as e:
 83 |             error_msg = f"API call failed with HTTP {e.response.status_code} (Attempt {attempt + 1}/{retries + 1}): {e}"
 84 |             print(f"\n{tool_name} request failed ({e.response.status_code}).")
 85 |             log_to_file(error_msg)
 86 |             if e.response.status_code != 429 or attempt >= retries:
 87 |                 return None, None  # Fail on non-429 errors or if retries are exhausted
 88 | 
 89 |         except (requests.exceptions.RequestException, ValueError, KeyError, IndexError) as e:
 90 |             error_msg = f"An error occurred during API call or response parsing (Attempt {attempt + 1}/{retries + 1}): {e}"
 91 |             print(f"\n{tool_name} request failed.")
 92 |             log_to_file(f"{error_msg}\nRaw Response (if available):\n{locals().get('response', 'N/A')}")
 93 |             if attempt >= retries:
 94 |                 return None, None  # Final attempt failed
 95 | 
 96 |         # If we are going to retry, calculate wait time and log it
 97 |         if attempt < retries:
 98 |             wait_time = base_wait_time * (2 ** attempt) + random.uniform(0, 1) # Exponential backoff with jitter
 99 |             print(f"Waiting for {wait_time:.2f} seconds before retrying...")
100 |             log_to_file(f"Retrying after {wait_time:.2f} seconds.")
101 |             time.sleep(wait_time)
102 | 
103 |     return None, None # Should be unreachable, but as a fallback


--------------------------------------------------------------------------------
/installation_readme.md:
--------------------------------------------------------------------------------
 1 | # Analysis of orpheus_Installer.sh
 2 | 
 3 | This document outlines the components, prerequisites, and setup steps performed or guided by the `orpheus_Installer.sh` script.
 4 | 
 5 | ## 1. System Program Prerequisites (Checks)
 6 | 
 7 | The script checks if the following command-line programs are installed before proceeding. If not found, it typically suggests an installation command (often using `apt`) or provides a link to installation instructions.
 8 | 
 9 | *   **`git`**: Required for cloning the necessary GitHub repository.
10 | *   **`docker`**: Required for running the core application components in containers.
11 | *   **`docker-compose`** (or `docker compose`): Required for orchestrating the Docker containers defined in the compose file.
12 | *   **`python3`**: The Python 3 interpreter needed for the host virtual environment setup and potentially running helper scripts. (Checked using `command -v python3`).
13 | *   **`pip3`**: The Python package installer for Python 3, used within the host virtual environment. (Checked using `command -v pip3`).
14 | *   **`ffmpeg`**: A multimedia framework, likely needed for audio processing by the TTS system or related scripts.
15 | 
16 | 
17 | ### Optional Automatic Dependency Installation
18 | 
19 | *   The script attempts to detect your Linux distribution (Debian/Ubuntu, Arch, Fedora/RHEL, openSUSE families, and derivatives like Mint, Pop!_OS, EndeavourOS, etc. are supported).
20 | *   If detected, it will list recommended system libraries (like `ffmpeg`, `python3-tk`, etc.) needed for the *host* Python scripts.
21 | *   It will then ask if you want to attempt installing these using the system's package manager (`apt`, `pacman`, `dnf`/`yum`) via `sudo`.
22 | *   Answering 'y' (Yes) will trigger the installation attempt. Answering 'n' (No) or pressing Enter (default) will skip this step, requiring manual installation if needed.
23 | 
24 | ### Optional/Recommended System Libraries (Warnings)
25 | 
26 | The script warns that the *host* Python scripts (`mainv3.py`, `orpheus_tts.py`) might require additional system libraries, suggesting installation commands:
27 | 
28 | *   `python3-tk` (for Tkinter GUI elements)
29 | *   `libsndfile1` (for audio file handling)
30 | *   `portaudio19-dev` (for audio I/O)
31 | *   Selenium WebDriver (requires a browser like Chrome and its corresponding `chromedriver` in the system PATH) - *Note: The script attempts to install these.*
32 | 
33 | ### GPU-Specific Checks
34 | 
35 | *   **`nvidia-smi`**: Used to detect if an NVIDIA GPU is present.
36 | *   **`nvidia-container-runtime` / `nvidia-container-toolkit`**: Checks for the necessary toolkit to allow Docker containers to access the NVIDIA GPU.
37 | 
38 | ## 2. GitHub Projects
39 | 
40 | The script clones or updates the following repository:
41 | 
42 | *   **Repository:** `https://github.com/Lex-au/Orpheus-FastAPI.git`
43 | *   **Destination:** Cloned into the `Orpheus-FastAPI` subdirectory within the user-specified installation directory (default: `orpheus_tts_setup`).
44 | 
45 | ## 3. Python Virtual Environment (`venv`)
46 | 
47 | The script sets up a dedicated Python virtual environment for host-level scripts:
48 | 
49 | *   **Type:** Standard Python `venv`.
50 | *   **Name:** `host_venv`
51 | *   **Location:** Created inside the main installation directory (e.g., `orpheus_tts_setup/host_venv`).
52 | *   **Purpose:** To install Python dependencies for `mainv3.py` and `orpheus_tts.py` without interfering with the system's global Python environment.
53 | 
54 | ## 4. Pip Packages
55 | 
56 | Python packages are installed using `pip3` *within* the `host_venv` virtual environment:
57 | 
58 | *   **Source:** Packages listed in the `requirements_host.txt` file (expected to be in the directory *parent* to the installation directory).
59 | *   **Action:** `pip3 install -r ../requirements_host.txt` is executed within the activated `host_venv`.
60 | *   **Specific Packages:** The exact packages depend on the contents of `requirements_host.txt` (not included in the installer script itself).
61 | *   **NLTK Data:** Downloads the 'punkt' tokenizer data (`python3 -m nltk.downloader punkt`) required by the NLTK library (which is presumably listed in `requirements_host.txt`).
62 | *   **Pip Upgrade:** Upgrades `pip` itself within the `host_venv`.
63 | 
64 | ## 5. Docker Components (User Action Required Post-Script)
65 | 
66 | The script itself **does not** build or run Docker containers. It prepares the necessary files and instructs the user on how to start the services using Docker Compose.
67 | 
68 | *   **Configuration File:** The primary file used is `docker-compose-gpu.yml` located inside the cloned `Orpheus-FastAPI` directory.
69 | *   **User Command:** The user is instructed to run `docker compose -f docker-compose-gpu.yml up` (or `docker-compose ...` for V1 syntax).
70 | *   **Expected Services (Based on Compose File):**
71 |     *   A FastAPI web application container (likely built from `Dockerfile.gpu` or `Dockerfile.cpu` in the `Orpheus-FastAPI` repo).
72 |     *   A `llama.cpp` server container (likely pulled from a registry or built, responsible for running the GGUF model).
73 | *   **Model Management:** The script defines the URL and filename for the `Orpheus-3b-FT-Q8_0.gguf` model, but the download is likely handled *within* the Docker environment orchestrated by the compose file, not directly by the installer script. The script notes the model is "managed by Docker Compose".
74 | *   **GPU vs CPU:** The script checks for GPU capabilities and defaults to instructing the user to use `docker-compose-gpu.yml`. It warns that a CPU-specific compose file might be needed if no GPU is available or configured correctly with the NVIDIA Container Toolkit.
75 | 
76 | ## 6. Conda Environments
77 | 
78 | The script does **not** use or create any Conda environments.
79 | 


--------------------------------------------------------------------------------
/functions/config.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import yaml
  3 | import json # Although json is not directly used in these functions, it's used in call_ai_api which relies on config.
  4 | from dotenv import load_dotenv
  5 | from .utils import log_to_file # Import log_to_file from the new utils module
  6 | 
  7 | # Define script directory and LLM directory relative to this new structure
  8 | # Assuming the new_script_builder.py will be in new_style/
  9 | # And settings are still relative to the original Ecne-AI-Podcaster directory.
 10 | # We need to adjust paths accordingly.
 11 | # The original SCRIPT_DIR was os.path.dirname(__file__) from Ecne-AI-Podcaster/script_builder.py
 12 | # The new script will be in new_style/, so relative paths need adjustment.
 13 | # Let's assume the settings directory remains relative to the project base
 14 | NEW_SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) # Directory of this file (e.g., .../Ecne-AI-Podcasterv2/functions)
 15 | # Go up one level from the functions directory to reach the project root (e.g., .../Ecne-AI-Podcasterv2)
 16 | PROJECT_BASE_DIR = os.path.abspath(os.path.join(NEW_SCRIPT_DIR, '..'))
 17 | LLM_DIR = os.path.join(PROJECT_BASE_DIR, "settings/llm_settings")
 18 | 
 19 | 
 20 | def load_config():
 21 |     """Loads configuration from .env file and ai_models.yml."""
 22 |     load_dotenv()
 23 |     config = {
 24 |         # API endpoint and key are now loaded from ai_models.yml based on selection
 25 |         "google_api_key": os.getenv("GOOGLE_API_KEY"),
 26 |         "google_cse_id": os.getenv("GOOGLE_CSE_ID"),
 27 |         "brave_api_key": os.getenv("BRAVE_API_KEY"),
 28 |         # Reddit keys are loaded but unused in current scraping logic
 29 |         "reddit_client_id": os.getenv("REDDIT_CLIENT_ID"),
 30 |         "reddit_client_secret": os.getenv("REDDIT_CLIENT_SECRET"),
 31 |         "reddit_user_agent": os.getenv("REDDIT_USER_AGENT"),
 32 |     }
 33 | 
 34 |     # --- Load Model Configurations ---
 35 |     models_config_path = os.path.join(LLM_DIR, 'ai_models.yml')
 36 |     try:
 37 |         with open(models_config_path, 'r', encoding='utf-8') as f:
 38 |             models_config = yaml.safe_load(f)
 39 |         if not models_config or not isinstance(models_config, dict):
 40 |              raise ValueError("ai_models.yml is empty or not a valid dictionary.")
 41 |         print(f"Loaded model configurations from {models_config_path}")
 42 |         log_to_file(f"Loaded model configurations from {models_config_path}")
 43 |     except FileNotFoundError:
 44 |         print(f"Error: Model configuration file not found at {models_config_path}")
 45 |         log_to_file(f"Error: Model configuration file not found at {models_config_path}")
 46 |         # In a modular structure, we might return None or raise a specific error
 47 |         # For now, maintaining original behavior of exiting
 48 |         exit(1)
 49 |     except (yaml.YAMLError, ValueError) as e:
 50 |         print(f"Error parsing model configuration file {models_config_path}: {e}")
 51 |         log_to_file(f"Error parsing model configuration file {models_config_path}: {e}")
 52 |         exit(1)
 53 | 
 54 |     # NOTE: Model selection logic moved to main() after args parsing
 55 | 
 56 |     # Basic validation
 57 |     # Check search APIs
 58 |     google_ok = config.get("google_api_key") and config.get("google_cse_id")
 59 |     brave_ok = config.get("brave_api_key")
 60 |     if not google_ok and not brave_ok:
 61 |          print("Warning: Neither Google (API Key + CSE ID) nor Brave API Key are set. Web search will fail.")
 62 |          log_to_file("Warning: Neither Google (API Key + CSE ID) nor Brave API Key are set. Web search will fail.")
 63 | 
 64 |     # Check Reddit API creds
 65 |     reddit_ok = all(config.get(k) for k in ["reddit_client_id", "reddit_client_secret", "reddit_user_agent"])
 66 |     if not reddit_ok:
 67 |         print("Warning: Reddit credentials (client_id, client_secret, user_agent) missing in .env. Reddit scraping via PRAW will fail.")
 68 |         log_to_file("Warning: Reddit credentials (client_id, client_secret, user_agent) missing in .env. Reddit scraping via PRAW will fail.")
 69 | 
 70 |     print("Configuration loaded.")
 71 |     log_to_file("Configuration loaded successfully.")
 72 |     # Return both basic config and the loaded models dictionary
 73 |     return config, models_config
 74 | 
 75 | def load_character_profile(filepath):
 76 |     """Loads character profile from a YAML file."""
 77 |     try:
 78 |         print(f"Loading character profile from {filepath}")
 79 |         log_to_file(f"Attempting to load character profile from {filepath}")
 80 |         with open(filepath, 'r', encoding='utf-8') as f:
 81 |             profile = yaml.safe_load(f)
 82 |             # Ensure podcast name is loaded if present
 83 |             if 'podcast_name' not in profile:
 84 |                  print(f"Warning: 'podcast_name' not found in profile {filepath}. Using default.")
 85 |                  log_to_file(f"Warning: 'podcast_name' not found in profile {filepath}. Using default.")
 86 |                  profile.setdefault('podcast_name', 'Podcast') # Default if missing
 87 |             print(f"Loaded character profile from {filepath}")
 88 |             log_to_file(f"Successfully loaded character profile from {filepath}")
 89 |             return profile
 90 |     except FileNotFoundError:
 91 |         print(f"Error: Character profile file not found at {filepath}")
 92 |         log_to_file(f"Error: Character profile file not found at {filepath}")
 93 |         return None
 94 |     except yaml.YAMLError as e:
 95 |         print(f"Error parsing YAML file {filepath}: {e}")
 96 |         log_to_file(f"Error parsing YAML file {filepath}: {e}")
 97 |         return None
 98 |     except Exception as e:
 99 |         print(f"An unexpected error occurred loading {filepath}: {e}")
100 |         log_to_file(f"An unexpected error occurred loading {filepath}: {e}")
101 |         return None


--------------------------------------------------------------------------------
/functions/search/discovery.py:
--------------------------------------------------------------------------------
  1 | import requests
  2 | import time
  3 | import random
  4 | import re
  5 | 
  6 | from ..ai import call_ai_api # Import call_ai_api from the new ai module
  7 | from ..utils import log_to_file, parse_ai_tool_response, USER_AGENTS # Import utilities
  8 | 
  9 | def discover_sources(keywords_list, config, args): # Added args parameter
 10 |     """Uses AI to discover relevant websites and subreddits."""
 11 |     print("\nDiscovering sources via AI...")
 12 |     log_to_file("Starting source discovery phase.")
 13 |     # Use the first keyword/phrase for simplicity, or combine them
 14 |     discovery_keyword_str = " | ".join(keywords_list)
 15 |     print(f"Using keywords for discovery: '{discovery_keyword_str}'")
 16 |     log_to_file(f"Keywords for discovery: '{discovery_keyword_str}'")
 17 | 
 18 |     prompt = (
 19 |         f"Based on the keywords '{discovery_keyword_str}', suggest relevant information sources. "
 20 |         f"Include specific websites (news sites, reputable blogs, official project sites) and relevant subreddits. "
 21 |         f"Prioritize sources known for reliable, detailed information on this topic.\n"
 22 |         f"Format your response strictly within <toolWebsites> tags, listing each source URL or subreddit name (e.g., 'r/technology' or 'techcrunch.com') on a new line.\n"
 23 |         f"Example:\n<toolWebsites>\ntechcrunch.com\nwired.com\nexampleblog.net/relevant-section\nr/artificial\nr/machinelearning\n</toolWebsites>"
 24 |     )
 25 | 
 26 |     raw_response, cleaned_response = call_ai_api(prompt, config, tool_name="SourceDiscovery", timeout=args.ai_timeout, retries=args.ai_retries)
 27 | 
 28 |     if not cleaned_response:
 29 |         log_to_file("Error: No response received from AI API for source discovery.")
 30 |         print("\nError: No response received from AI API for source discovery.")
 31 |         return []
 32 | 
 33 |     sources_str = parse_ai_tool_response(cleaned_response, "toolWebsites")
 34 | 
 35 |     if not sources_str or sources_str == cleaned_response: # Parsing failed or tag missing
 36 |         log_to_file("Error: Could not parse <toolWebsites> tag in source discovery response.")
 37 |         print("\nError: Could not parse <toolWebsites> tag in source discovery response.")
 38 |         return []
 39 | 
 40 |     # Remove trailing parenthetical explanations before validation
 41 |     sources_list_raw = [line.strip() for line in sources_str.split('\n') if line.strip()]
 42 |     sources_list = []
 43 |     for line in sources_list_raw:
 44 |         # Remove ' (explanation...)' from the end of the line
 45 |         cleaned_line = re.sub(r'\s*\(.*\)\s*$', '', line).strip()
 46 |         if cleaned_line:
 47 |             # Handle domain names without protocol
 48 |             if '.' in cleaned_line and not cleaned_line.startswith(('http://', 'https://', 'r/')):
 49 |                 cleaned_line = f"https://{cleaned_line}"
 50 |             # Add if it's a valid URL or reddit source
 51 |             if cleaned_line.startswith(('http://', 'https://', 'r/')):
 52 |                 sources_list.append(cleaned_line)
 53 | 
 54 |     if not sources_list:
 55 |         log_to_file(f"Warning: No valid sources extracted after parsing.\nParsed content: {sources_str}")
 56 |         print("\nWarning: No valid sources extracted after parsing.")
 57 |         return []
 58 | 
 59 |     print(f"Discovered {len(sources_list)} potential sources.")
 60 |     log_to_file(f"Discovered {len(sources_list)} potential sources.")
 61 | 
 62 |     # --- Add Source Validation ---
 63 |     validated_sources = []
 64 |     print("Validating sources...")
 65 |     log_to_file("Validating discovered sources.")
 66 |     for source in sources_list:
 67 |         is_valid = False
 68 |         print(f"  - Checking: {source}...", end="")
 69 |         try:
 70 |             if source.startswith('r/'): # Assume subreddit exists if AI suggested
 71 |                 is_valid = True
 72 |                 print(" OK (Subreddit)")
 73 |             else: # Check website accessibility
 74 |                 # Prepend http:// if no scheme exists
 75 |                 url_to_check = source if source.startswith(('http://', 'https://')) else f'http://{source}'
 76 |                 # Use HEAD request for efficiency
 77 |                 response = requests.head(url_to_check, headers={'User-Agent': random.choice(USER_AGENTS)}, timeout=10, allow_redirects=True)
 78 |                 if response.status_code < 400: # OK or Redirect
 79 |                     is_valid = True
 80 |                     print(f" OK (Status: {response.status_code})")
 81 |                 else:
 82 |                     print(f" Failed (Status: {response.status_code})")
 83 |         except requests.exceptions.RequestException as e:
 84 |              print(f" Failed (Error: {e})")
 85 |              log_to_file(f"Source validation failed for {source}: {e}")
 86 |         except Exception as e:
 87 |             print(f" Failed (Unexpected Error: {e})")
 88 |             log_to_file(f"Source validation failed for {source} (Unexpected): {e}")
 89 | 
 90 |         if is_valid:
 91 |             validated_sources.append(source)
 92 |         time.sleep(0.5) # Small delay between checks
 93 | 
 94 |     print(f"Validated {len(validated_sources)} sources: {validated_sources}")
 95 |     log_to_file(f"Validated {len(validated_sources)} sources: {validated_sources}")
 96 | 
 97 |     # --- Filter Reddit sources if --no-reddit is specified ---
 98 |     if args.no_reddit:
 99 |         non_reddit_sources = [src for src in validated_sources if not (src.startswith('r/') or 'reddit.com/r/' in src)]
100 |         print(f"Filtering Reddit sources due to --no-reddit flag. Using {len(non_reddit_sources)} non-Reddit sources.")
101 |         log_to_file(f"Source Discovery: Filtered out Reddit sources. Using {len(non_reddit_sources)} sources: {non_reddit_sources}")
102 |         return non_reddit_sources
103 |     else:
104 |         return validated_sources


--------------------------------------------------------------------------------
/functions/tts/args.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import os
 3 | 
 4 | # Define Languages and Voices (copied from orpheus_tts.py for now, will be removed if centralized)
 5 | LANGUAGES_VOICES = {
 6 |     'English': ['tara', 'leah', 'jess', 'leo', 'dan', 'mia', 'zac', 'zoe'],
 7 |     'French': ['pierre', 'amelie', 'marie'],
 8 |     'German': ['jana', 'thomas', 'max'],
 9 |     'Korean': ['유나', '준서'],
10 |     'Hindi': ['ऋतिका'],
11 |     'Mandarin': ['长乐', '白芷'],
12 |     'Spanish': ['javi', 'sergio', 'maria'],
13 |     'Italian': ['pietro', 'giulia', 'carlo']
14 | }
15 | LANGUAGES = list(LANGUAGES_VOICES.keys())
16 | ALL_VOICES = [voice for lang_voices in LANGUAGES_VOICES.values() for voice in lang_voices]
17 | 
18 | def parse_tts_arguments():
19 |     """
20 |     Parses command-line arguments specific to the TTS builder.
21 |     """
22 |     parser = argparse.ArgumentParser(
23 |         description="Generate speech from text or a script file using Orpheus TTS FastAPI endpoint.",
24 |         epilog="Examples:\n"
25 |                "  Single sentence: python3 tts_builder.py --input \"Hello there.\" --voice leo --output single\n"
26 |                "  From script:   python3 tts_builder.py --script podcast.txt --host-voice leo --guest-voice tara --output podcast_audio.wav --silence 0.5\n"
27 |                "  Dev Mode:      python3 tts_builder.py --script podcast.txt --dev --output dev_test.wav --silence 0.5\n"
28 |                "  Expanded:      python3 tts_builder.py   --script podcast_script_small.txt   --host-voice leo   --guest-voice tara   --output simple_test_script   --dev   --guest-breakup   --video-resolution \"1920x1080\"   --video-fps 24   --video-intermediate-preset slow   --video-intermediate-crf 18   --video-final-audio-bitrate 320k",
29 |         formatter_class=argparse.RawDescriptionHelpFormatter
30 |     )
31 | 
32 |     # --- Input Arguments (Mutually Exclusive) ---
33 |     group = parser.add_mutually_exclusive_group(required=True)
34 |     group.add_argument('--input', type=str, help='Single text input to synthesize.')
35 |     group.add_argument('--script', type=str, help='Path to a script file (.txt) with lines like "Speaker: Dialogue".')
36 |     group.add_argument('--resume-from-json', type=str, help='Path to a podcast JSON file to resume editing.')
37 | 
38 |     # --- Script Specific Arguments ---
39 |     parser.add_argument('--host-voice', type=str, default='leo',
40 |                         help='Voice to use for lines starting with "Host:" (script mode only, default: leo).')
41 |     parser.add_argument('--guest-voice', type=str, default='tara',
42 |                         help='Voice to use for lines starting with "Guest:" (script mode only, default: tara).')
43 |     parser.add_argument('--silence', type=float, default=1.0,
44 |                         help='Duration of silence in seconds between script lines (default: 1.0). Use 0 to disable.')
45 | 
46 |     # --- General Arguments ---
47 |     parser.add_argument('--voice', type=str, default='tara',
48 |                         help='Voice to use for single --input (default: tara).')
49 |     parser.add_argument('--speed', type=float, default=1.0,
50 |                         help='Speech speed factor (0.5 to 1.5, default: 1.0).')
51 |     parser.add_argument('--port', type=int, default=5005,
52 |                         help='Port the Orpheus-FastAPI server is running on (default: 5005).')
53 |     parser.add_argument('--api-host', type=str, default='127.0.0.1',
54 |                         help='Host the Orpheus-FastAPI server is running on (default: 127.0.0.1).')
55 |     parser.add_argument('--output', type=str, default='output_speech.wav',
56 |                         help='Output filename for the generated audio (default: output_speech.wav).')
57 |     parser.add_argument('--dev', action='store_true',
58 |                         help='Enable development mode: launch GUI to review/redo segments before finalizing.')
59 |     parser.add_argument('--guest-breakup', action='store_true',
60 |                         help='Break Guest dialogue into sentences for separate TTS processing.')
61 |     parser.add_argument('--tts-max-retries', type=int, default=3,
62 |                         help='Maximum number of retry attempts for failed TTS requests (default: 3).')
63 |     parser.add_argument('--tts-timeout', type=int, default=180,
64 |                         help='Timeout in seconds for each TTS request (default: 180).')
65 | 
66 |     # --- Video Generation Arguments (used when --dev is enabled) ---
67 |     video_group = parser.add_argument_group('Video Generation Options (--dev mode only)')
68 |     video_group.add_argument('--video-resolution', type=str, default="1280x720",
69 |                              help='Video resolution (e.g., "1920x1080"). Default determined by first background, fallback to this.')
70 |     video_group.add_argument('--video-fps', type=int, default=24,
71 |                              help='Frames per second for the output video.')
72 |     video_group.add_argument('--video-character-scale', type=float, default=1.0,
73 |                               help='Scale factor for character images in the video.')
74 |     video_group.add_argument('--video-fade', type=float, default=1.0,
75 |                               help='Video fade duration for intro/outro segments.')
76 |     video_group.add_argument('--video-intermediate-preset', default='medium',
77 |                              help='Encoding preset for intermediate video segments (e.g., ultrafast, medium, slow).')
78 |     video_group.add_argument('--video-intermediate-crf', type=int, default=23,
79 |                              help='CRF value for intermediate video segments (0-51, lower is better quality).')
80 |     video_group.add_argument('--video-final-audio-bitrate', default='192k',
81 |                              help='Bitrate for final AAC audio encoding (e.g., 128k, 192k).')
82 |     video_group.add_argument('--video-workers', type=int, default=None,
83 |                              help='Number of worker processes for video generation. Defaults to CPU count.')
84 |     video_group.add_argument('--video-keep-temp', action='store_true',
85 |                              help='Keep temporary video segment files after completion.')
86 | 
87 |     return parser.parse_args()
88 | 
89 | if __name__ == '__main__':
90 |     args = parse_tts_arguments()
91 |     print("Parsed Arguments:")
92 |     for arg, value in vars(args).items():
93 |         print(f"  {arg}: {value}")


--------------------------------------------------------------------------------
/functions/search/api.py:
--------------------------------------------------------------------------------
  1 | import requests
  2 | import time
  3 | import random
  4 | import datetime
  5 | import json
  6 | import urllib.parse
  7 | 
  8 | from ..utils import log_to_file, USER_AGENTS # Import utilities from functions.utils
  9 | 
 10 | # --- Search API Functions ---
 11 | 
 12 | def search_google_api(query, config, num_results, from_date=None, to_date=None):
 13 |     """Performs search using Google Custom Search API."""
 14 |     urls = []
 15 |     api_key = config.get("google_api_key")
 16 |     cse_id = config.get("google_cse_id")
 17 |     if not api_key or not cse_id:
 18 |         log_to_file("Google API search skipped: API Key or CSE ID missing.")
 19 |         return None
 20 | 
 21 |     search_url = "https://www.googleapis.com/customsearch/v1"
 22 |     # Add date ranges using Google's `sort=date:r:YYYYMMDD:YYYYMMDD` parameter
 23 |     date_restrict = ""
 24 |     if from_date:
 25 |         try:
 26 |             from_dt_str = datetime.datetime.strptime(from_date, '%Y-%m-%d').strftime('%Y%m%d')
 27 |             to_dt_str = datetime.datetime.strptime(to_date, '%Y-%m-%d').strftime('%Y%m%d') if to_date else datetime.datetime.now().strftime('%Y%m%d')
 28 |             date_restrict = f"date:r:{from_dt_str}:{to_dt_str}"
 29 |         except ValueError:
 30 |              print(f"  - Warning: Invalid date format for Google search '{from_date}' or '{to_date}'. Ignoring date range.")
 31 |              log_to_file(f"Google API Warning: Invalid date format '{from_date}'/'{to_date}'. Ignoring date range.")
 32 | 
 33 |     print(f"  - Searching Google API: '{query}' (Num: {num_results}, Date: '{date_restrict or 'None'}')")
 34 |     log_to_file(f"Google API Search: Query='{query}', Num={num_results}, DateRestrict='{date_restrict}'")
 35 | 
 36 |     params = {'key': api_key, 'cx': cse_id, 'q': query, 'num': min(num_results, 10)}
 37 |     if date_restrict:
 38 |         params['sort'] = date_restrict # Add sort parameter for date range
 39 | 
 40 |     try:
 41 |         response = requests.get(search_url, params=params, timeout=20)
 42 |         response.raise_for_status()
 43 |         search_data = response.json()
 44 | 
 45 |         if 'items' in search_data:
 46 |             urls = [item['link'] for item in search_data['items'] if 'link' in item]
 47 |             print(f"    - Google Found: {len(urls)} results.")
 48 |             log_to_file(f"Google API Success: Found {len(urls)} URLs.")
 49 |         else:
 50 |             print("    - Google Found: 0 results.")
 51 |             log_to_file("Google API Success: No items found in response.")
 52 | 
 53 |         if 'error' in search_data and search_data['error'].get('code') == 429:
 54 |              print("    - !! Google API Quota limit likely reached !!")
 55 |              log_to_file("Google API Error: Quota limit reached (429 in response body).")
 56 |              return 'quota_error'
 57 |         return urls
 58 | 
 59 |     except requests.exceptions.HTTPError as e:
 60 |         print(f"    - Error calling Google API: {e}")
 61 |         log_to_file(f"Google API HTTP Error: {e}")
 62 |         if e.response.status_code == 429:
 63 |             print("    - !! Google API Quota limit likely reached (HTTP 429) !!")
 64 |             log_to_file("Google API Error: Quota limit reached (HTTP 429).")
 65 |             return 'quota_error'
 66 |         return None
 67 |     except requests.exceptions.RequestException as e:
 68 |         print(f"    - Error calling Google API: {e}")
 69 |         log_to_file(f"Google API Request Error: {e}")
 70 |         return None
 71 |     except Exception as e:
 72 |         print(f"    - Unexpected error during Google API search: {e}")
 73 |         log_to_file(f"Google API Unexpected Error: {e}")
 74 |         return None
 75 |     finally:
 76 |         time.sleep(random.uniform(1, 2)) # Delay
 77 | 
 78 | def search_brave_api(query, config, num_results, from_date=None, to_date=None):
 79 |     """Performs search using Brave Search API."""
 80 |     urls = []
 81 |     api_key = config.get("brave_api_key")
 82 |     if not api_key:
 83 |         log_to_file("Brave API search skipped: API Key missing.")
 84 |         return None
 85 | 
 86 |     search_url = "https://api.search.brave.com/res/v1/web/search"
 87 |     headers = {"Accept": "application/json", "Accept-Encoding": "gzip", "X-Subscription-Token": api_key}
 88 |     freshness_param = None
 89 | 
 90 |     if from_date:
 91 |         try:
 92 |             from_dt = datetime.datetime.strptime(from_date, '%Y-%m-%d')
 93 |             freshness_start = from_dt.strftime('%Y%m%d')
 94 |             freshness_end = ""
 95 |             if to_date:
 96 |                 to_dt = datetime.datetime.strptime(to_date, '%Y-%m-%d')
 97 |                 freshness_end = to_dt.strftime('%Y%m%d')
 98 |             freshness_param = f"pd:{freshness_start},{freshness_end}"
 99 |         except ValueError:
100 |             print(f"  - Warning: Invalid date format for Brave freshness '{from_date}' or '{to_date}'. Skipping date filter.")
101 |             log_to_file(f"Brave API Warning: Invalid date format '{from_date}'/'{to_date}' for freshness.")
102 | 
103 |     print(f"  - Searching Brave API: '{query}' (Num: {num_results}, Freshness: '{freshness_param or 'None'}')")
104 |     log_to_file(f"Brave API Search: Query='{query}', Num={num_results}, Freshness='{freshness_param}'")
105 | 
106 |     params = {'q': query, 'count': num_results}
107 |     if freshness_param: params['freshness'] = freshness_param
108 | 
109 |     try:
110 |         # Log the exact request details before sending
111 |         prepared_request = requests.Request('GET', search_url, headers=headers, params=params).prepare()
112 |         log_to_file(f"Brave API Request Details:\n  URL: {prepared_request.url}\n  Headers: {prepared_request.headers}")
113 |         print(f"    - Requesting URL: {prepared_request.url}") # Also print URL for easier debugging
114 | 
115 |         response = requests.get(search_url, headers=headers, params=params, timeout=20)
116 |         response.raise_for_status()
117 |         search_data = response.json()
118 |         log_to_file(f"Brave API Raw Response Body:\n{json.dumps(search_data, indent=2)}") # Log the raw JSON response
119 | 
120 |         if 'web' in search_data and 'results' in search_data['web']:
121 |             urls = [item['url'] for item in search_data['web']['results'] if 'url' in item]
122 |             print(f"    - Brave Found: {len(urls)} results.")
123 |             log_to_file(f"Brave API Success: Found {len(urls)} URLs.")
124 |         else:
125 |             print("    - Brave Found: 0 results.")
126 |             log_to_file(f"Brave API Success: No web/results found in response. Keys: {search_data.keys()}")
127 |         return urls
128 | 
129 |     except requests.exceptions.HTTPError as e:
130 |         print(f"    - Error calling Brave API: {e}")
131 |         log_to_file(f"Brave API HTTP Error: {e}")
132 |         if e.response.status_code == 429:
133 |              print("    - !! Brave API Quota limit likely reached (HTTP 429) !!")
134 |              log_to_file("Brave API Error: Quota limit reached (HTTP 429).")
135 |              return 'quota_error'
136 |         return None
137 |     except requests.exceptions.RequestException as e:
138 |         print(f"    - Error calling Brave API: {e}")
139 |         log_to_file(f"Brave API Request Error: {e}")
140 |         return None
141 |     except Exception as e:
142 |         print(f"    - Unexpected error during Brave API search: {e}")
143 |         log_to_file(f"Brave API Unexpected Error: {e}")
144 |         return None
145 |     finally:
146 |         time.sleep(random.uniform(1, 2)) # Delay


--------------------------------------------------------------------------------
/functions/args.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | import yaml
  4 | import datetime
  5 | import urllib.parse
  6 | 
  7 | # Define LLM_DIR relative to this new structure
  8 | # Go up one level from functions, then into Ecne-AI-Podcaster, then settings/llm_settings
  9 | NEW_SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
 10 | ORIGINAL_BASE_DIR = os.path.abspath(os.path.join(NEW_SCRIPT_DIR, '..', '..', 'Ecne-AI-Podcaster'))
 11 | LLM_DIR = os.path.join(ORIGINAL_BASE_DIR, "settings/llm_settings")
 12 | 
 13 | 
 14 | def parse_arguments():
 15 |     """Parses command-line arguments."""
 16 |     parser = argparse.ArgumentParser(description="Generate an AI podcast episode.")
 17 | 
 18 |     # --- Load model keys dynamically for choices ---
 19 |     available_model_keys = []
 20 |     models_config_path = os.path.join(LLM_DIR, 'ai_models.yml')
 21 | 
 22 |     try:
 23 |         with open(models_config_path, 'r', encoding='utf-8') as f:
 24 |             models_config = yaml.safe_load(f)
 25 |         if models_config and isinstance(models_config, dict):
 26 |             available_model_keys = list(models_config.keys())
 27 |         else:
 28 |             print(f"Warning: Could not load valid model keys from {models_config_path}. --llm-model argument might fail.")
 29 |     except Exception as e:
 30 |         print(f"Warning: Error loading {models_config_path} for arg parsing: {e}. --llm-model argument might fail.")
 31 |     # --- End model key loading ---
 32 | 
 33 |     # --- Define Arguments ---
 34 |     # Core
 35 |     # Made keywords not required, will validate later based on --no-search
 36 |     parser.add_argument("--keywords", type=str, default=None, help="Comma-separated keywords/phrases for searching (required unless --no-search is used).")
 37 |     parser.add_argument("--topic", type=str, required=True, help="The main topic phrase for the podcast episode.")
 38 |     # AI Model Selection
 39 |     parser.add_argument("--llm-model", type=str, default=None, choices=available_model_keys if available_model_keys else None,
 40 |                         help="Specify the LLM configuration key from ai_models.yml to use (overrides .env setting).")
 41 |     # Search & Scraping
 42 |     parser.add_argument("--api", choices=['google', 'brave'], default='google', help="Preferred search API ('google' or 'brave').")
 43 |     parser.add_argument("--from_date", type=str, default=None, help="Start date for search (YYYY-MM-DD).")
 44 |     parser.add_argument("--to_date", type=str, default=None, help="End date for search (YYYY-MM-DD).")
 45 |     parser.add_argument("--max-web-results", type=int, default=3, help="Max results per website source domain.")
 46 |     parser.add_argument("--max-reddit-results", type=int, default=5, help="Max *posts* to scrape per subreddit source.")
 47 |     parser.add_argument("--max-reddit-comments", type=int, default=5, help="Max *comments* to scrape per Reddit post.")
 48 |     parser.add_argument("--per-keyword-results", type=int, default=None, help="Web results per keyword (defaults to max-web-results).")
 49 |     parser.add_argument("--combine-keywords", action="store_true", help="Treat keywords as one search query (legacy).")
 50 |     # Output & Content
 51 |     parser.add_argument("--report", action="store_true", help="Generate a written report in addition to the script.")
 52 |     parser.add_argument("--youtube-description", action="store_true", help="Generate a YouTube description based on the report.")
 53 |     parser.add_argument("--score-threshold", type=int, default=5, help="Minimum summary score (0-10) to include in script.")
 54 |     parser.add_argument("--ai-timeout", type=int, default=120, help="Global timeout in seconds for all AI API calls.")
 55 |     parser.add_argument("--ai-retries", type=int, default=5, help="Global number of retries for all AI API calls.")
 56 |     parser.add_argument("--guidance", type=str, default=None, help="Additional guidance/instructions string for the LLM prompts.")
 57 |     parser.add_argument("--direct-articles", type=str, default=None, help="Path to a text file containing a list of article URLs (one per line) to scrape directly.")
 58 |     parser.add_argument("--no-search", action="store_true", help="Skip AI source discovery and web search APIs. Requires --direct-articles to be set.")
 59 |     # parser.add_argument("--sources", type=str, default=None, help="Comma-separated list of sources to use instead of AI discovery.")
 60 |     parser.add_argument("--reference-docs", type=str, default=None, help="Comma-separated paths to text files containing reference information.")
 61 |     parser.add_argument("--reference-docs-summarize", action="store_true", help="Summarize and score reference docs before including them.")
 62 |     parser.add_argument("--reference-docs-folder", type=str, default=None, help="Path to a folder containing reference documents (txt, pdf, docx).")
 63 |     parser.add_argument("--no-reddit", action="store_true", help="Exclude Reddit sources from discovery and scraping.")
 64 |     parser.add_argument("--single-speaker", action="store_true", help="Generate a single-speaker podcast script (Host only, no Guest).")
 65 | 
 66 |     args = parser.parse_args()
 67 | 
 68 |     # Set default for per_keyword_results
 69 |     if args.per_keyword_results is None:
 70 |         args.per_keyword_results = args.max_web_results
 71 | 
 72 |     # Process keywords only if provided
 73 |     search_queries = [] # Initialize default
 74 |     if args.keywords:
 75 |         if args.combine_keywords:
 76 |             raw_keywords = [k.strip() for k in args.keywords.split(',') if k.strip()]
 77 |             if not raw_keywords: raise ValueError("Please provide at least one keyword if using --keywords.")
 78 |             search_queries = [" ".join(raw_keywords)]
 79 |             print("Keywords combined into a single search query.")
 80 |         else:
 81 |             search_queries = [k.strip() for k in args.keywords.split(',') if k.strip()]
 82 |             if not search_queries: raise ValueError("Please provide at least one keyword/phrase if using --keywords.")
 83 |             print(f"Processing {len(search_queries)} separate search queries.")
 84 |     elif not args.no_search: # Keywords are required if we ARE doing a search
 85 |          parser.error("--keywords is required unless --no-search is specified.")
 86 | 
 87 |     # Validate dates
 88 |     def validate_date(date_str):
 89 |         if date_str is None: return None
 90 |         try:
 91 |             datetime.datetime.strptime(date_str, '%Y-%m-%d')
 92 |             return date_str
 93 |         except ValueError:
 94 |             raise ValueError(f"Invalid date format '{date_str}'. Use YYYY-MM-DD.")
 95 | 
 96 |     args.from_date = validate_date(args.from_date)
 97 |     args.to_date = validate_date(args.to_date)
 98 | 
 99 |     args.search_queries = search_queries # Store the processed list (or empty list) back into args
100 |     print(f"Args: {vars(args)}")
101 |     print(f"Parsed Args: {vars(args)}") # Keep print statement
102 | 
103 |     # Validation: --no-search requires --direct-articles OR reference docs/folder
104 |     # Modified this validation slightly: If --no-search is used, *some* form of input context is needed.
105 |     if args.no_search and not args.direct_articles and not args.reference_docs and not args.reference_docs_folder:
106 |         parser.error("--no-search requires at least one of --direct-articles, --reference-docs, or --reference-docs-folder to be specified.")
107 | 
108 |     # Validation: Keywords are required if search is active
109 |     if not args.no_search and not args.keywords:
110 |          # This check is now done during keyword processing above, but double-checking here is safe.
111 |          # parser.error("--keywords is required unless --no-search is specified.")
112 |          # Re-checking the logic, the check during processing is sufficient. Removing redundant check here.
113 |          pass # Validation moved to keyword processing block
114 | 
115 |     return args
116 | 


--------------------------------------------------------------------------------
/templates/settings.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html lang="en">
  3 | <head>
  4 |     <meta charset="UTF-8">
  5 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
  6 |     <title>Settings - AI Podcast Generator</title>
  7 |     <link rel="stylesheet" href="{{ url_for('static', filename='main.css') }}">
  8 | </head>
  9 | <body>
 10 |     <div class="container">
 11 |         <h1>Settings</h1>
 12 | 
 13 |         <nav>
 14 |             <a href="{{ url_for('index') }}">Back to Dashboard</a>
 15 |         </nav>
 16 | 
 17 |         <div class="settings-section">
 18 |             <h2>API Keys (.env)</h2>
 19 |             <form id="api-keys-form">
 20 |                 <div class="form-group">
 21 |                     <label for="google_api_key">GOOGLE_API_KEY:</label>
 22 |                     <input type="text" id="google_api_key" name="GOOGLE_API_KEY">
 23 |                 </div>
 24 |                 <div class="form-group">
 25 |                     <label for="google_cse_id">GOOGLE_CSE_ID:</label>
 26 |                     <input type="text" id="google_cse_id" name="GOOGLE_CSE_ID">
 27 |                 </div>
 28 |                 <div class="form-group">
 29 |                     <label for="brave_api_key">BRAVE_API_KEY:</label>
 30 |                     <input type="text" id="brave_api_key" name="BRAVE_API_KEY">
 31 |                 </div>
 32 |                  <div class="form-group">
 33 |                     <label for="reddit_client_id">REDDIT_CLIENT_ID:</label>
 34 |                     <input type="text" id="reddit_client_id" name="REDDIT_CLIENT_ID">
 35 |                 </div>
 36 |                  <div class="form-group">
 37 |                     <label for="reddit_client_secret">REDDIT_CLIENT_SECRET:</label>
 38 |                     <input type="text" id="reddit_client_secret" name="REDDIT_CLIENT_SECRET">
 39 |                 </div>
 40 |                  <div class="form-group">
 41 |                     <label for="reddit_user_agent">REDDIT_USER_AGENT:</label>
 42 |                     <input type="text" id="reddit_user_agent" name="REDDIT_USER_AGENT">
 43 |                 </div>
 44 |                 <button type="submit">Save API Keys</button>
 45 |             </form>
 46 |         </div>
 47 | 
 48 |         <div class="settings-section">
 49 |             <h2>LLM Settings (ai_models.yml)</h2>
 50 |             <form id="llm-settings-form">
 51 |                 <div class="form-group">
 52 |                     <label for="llm-model-select">Select LLM Model:</label>
 53 |                     <select id="llm-model-select" name="llm-model-select">
 54 |                         <!-- Options populated dynamically -->
 55 |                         <option value="">-- Select a model --</option>
 56 |                         <option value="add_new">Add New LLM Model</option>
 57 |                     </select>
 58 |                 </div>
 59 | 
 60 |                 <div id="llm-model-details" style="display: none;">
 61 |                     <div class="form-group">
 62 |                         <label for="llm-model-name">Model Name:</label>
 63 |                         <input type="text" id="llm-model-name" name="model" readonly>
 64 |                     </div>
 65 |                      <div class="form-group">
 66 |                         <label for="llm-api-key">API Key:</label>
 67 |                         <input type="text" id="llm-api-key" name="api_key">
 68 |                     </div>
 69 |                      <div class="form-group">
 70 |                         <label for="llm-api-base">API Base URL:</label>
 71 |                         <input type="text" id="llm-api-base" name="api_endpoint">
 72 |                     </div>
 73 |                      <div class="form-group">
 74 |                         <label for="llm-max-tokens">Max Tokens:</label>
 75 |                         <input type="number" id="llm-max-tokens" name="max_tokens">
 76 |                     </div>
 77 |                      <div class="form-group">
 78 |                         <label for="llm-temperature">Temperature:</label>
 79 |                         <input type="number" id="llm-temperature" name="temperature" step="0.01">
 80 |                     </div>
 81 |                      <div class="form-group">
 82 |                         <label for="llm-top-p">Top P:</label>
 83 |                         <input type="number" id="llm-top-p" name="top_p" step="0.01">
 84 |                     </div>
 85 |                      <div class="form-group">
 86 |                         <label for="llm-tool-config">Tool Config (JSON):</label>
 87 |                         <textarea id="llm-tool-config" name="tool_config" rows="4"></textarea>
 88 |                     </div>
 89 |                     <!-- Add other relevant LLM settings fields as needed -->
 90 | 
 91 |                     <button type="button" id="update-llm-model">Update Selected Model</button>
 92 |                     <button type="button" id="delete-llm-model">Delete Selected Model</button>
 93 |                 </div>
 94 | 
 95 |                 <div id="llm-add-new-section" style="display: none;">
 96 |                     <h3>Add New LLM Model</h3>
 97 |                     <div class="form-group">
 98 |                         <label for="new-llm-key">Configuration Name:</label>
 99 |                         <input type="text" id="new-llm-key" name="new_llm_key" placeholder="e.g., gpt4_turbo, claude_sonnet, my_local_model">
100 |                         <small style="color: #666; font-size: 0.9em; display: block; margin-top: 5px;">
101 |                             This is the identifier for your model configuration (like "default_model" or "gemini_flash"). Use a descriptive name without spaces.
102 |                         </small>
103 |                     </div>
104 |                     <div class="form-group">
105 |                         <label for="new-llm-model-name">Model Name:</label>
106 |                         <input type="text" id="new-llm-model-name" name="new_model_name">
107 |                     </div>
108 |                     <div class="form-group">
109 |                         <label for="new-llm-api-key">API Key:</label>
110 |                         <input type="text" id="new-llm-api-key" name="new_api_key">
111 |                     </div>
112 |                     <div class="form-group">
113 |                         <label for="new-llm-api-base">API Base URL:</label>
114 |                         <input type="text" id="new-llm-api-base" name="new_api_endpoint">
115 |                     </div>
116 |                     <div class="form-group">
117 |                         <label for="new-llm-max-tokens">Max Tokens:</label>
118 |                         <input type="number" id="new-llm-max-tokens" name="new_max_tokens">
119 |                     </div>
120 |                     <div class="form-group">
121 |                         <label for="new-llm-temperature">Temperature:</label>
122 |                         <input type="number" id="new-llm-temperature" name="new_temperature" step="0.01">
123 |                     </div>
124 |                     <div class="form-group">
125 |                         <label for="new-llm-top-p">Top P:</label>
126 |                         <input type="number" id="new-llm-top-p" name="new_top_p" step="0.01">
127 |                     </div>
128 |                     <div class="form-group">
129 |                         <label for="new-llm-tool-config">Tool Config (JSON):</label>
130 |                         <textarea id="new-llm-tool-config" name="new_tool_config" rows="4"></textarea>
131 |                     </div>
132 |                     <button type="button" id="add-new-llm-model">Add New Model</button>
133 |                 </div>
134 | 
135 |                 <button type="submit" id="save-llm-settings-btn">Save LLM Settings</button>
136 |             </form>
137 |         </div>
138 |     </div>
139 | 
140 |     <script src="{{ url_for('static', filename='main.js') }}"></script>
141 | </body>
142 | </html>
143 | 


--------------------------------------------------------------------------------
/functions/tts/utils.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import yaml
  3 | import numpy as np
  4 | import soundfile as sf
  5 | import tempfile
  6 | import shutil
  7 | from scipy.signal import resample # For resampling in concatenate_wavs
  8 | 
  9 | # Override print function to force immediate flushing for real-time output
 10 | original_print = print
 11 | def print(*args, **kwargs):
 12 |     kwargs.setdefault('flush', True)
 13 |     return original_print(*args, **kwargs)
 14 | 
 15 | # Define VOICE_DIR relative to the project root, assuming functions/tts/utils.py is in functions/tts/
 16 | SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
 17 | PROJECT_ROOT = os.path.abspath(os.path.join(SCRIPT_DIR, '..', '..')) # Go up two levels from functions/tts/
 18 | VOICE_DIR = os.path.abspath(os.path.join(PROJECT_ROOT, "settings/voices"))
 19 | os.makedirs(VOICE_DIR, exist_ok=True) # Ensure VOICE_DIR exists
 20 | 
 21 | def load_voice_config(voice_name):
 22 |     """Loads voice configuration from YAML file, falling back to default."""
 23 |     base_path = os.path.join(VOICE_DIR, f"{voice_name}.yaml")
 24 |     default_path = os.path.join(VOICE_DIR, "default.yaml")
 25 |     config_path = base_path if os.path.exists(base_path) else default_path
 26 | 
 27 |     # Hardcoded fallback defaults in case even default.yaml is missing/invalid
 28 |     hardcoded_defaults = {
 29 |         'gain_factor': 1.0, 'trim_end_ms': 0, 'nr_level': 0,
 30 |         'compress_thresh': 1.0, 'compress_ratio': 1, 'norm_frame_len': 10,
 31 |         'norm_gauss_size': 3, 'deesser_freq': 3000
 32 |     }
 33 | 
 34 |     if not os.path.exists(config_path):
 35 |         print(f"!! Warning: Voice config not found for '{voice_name}' and default.yaml missing. Using hardcoded defaults.")
 36 |         return hardcoded_defaults
 37 | 
 38 |     try:
 39 |         with open(config_path, 'r') as f:
 40 |             config = yaml.safe_load(f)
 41 |         if config is None: # Handle empty YAML file
 42 |             print(f"!! Warning: Voice config file '{os.path.basename(config_path)}' is empty. Using hardcoded defaults.")
 43 |             return hardcoded_defaults
 44 |         print(f"-> Loaded voice config from: {os.path.basename(config_path)}")
 45 |         # Merge with hardcoded defaults to ensure all keys exist
 46 |         final_config = hardcoded_defaults.copy()
 47 |         final_config.update(config)
 48 |         return final_config
 49 |     except yaml.YAMLError as e:
 50 |         print(f"!! Error parsing voice config file '{os.path.basename(config_path)}': {e}. Using hardcoded defaults.")
 51 |         return hardcoded_defaults
 52 |     except Exception as e:
 53 |         print(f"!! Error loading voice config file '{os.path.basename(config_path)}': {e}. Using hardcoded defaults.")
 54 |         return hardcoded_defaults
 55 | 
 56 | def generate_silence(duration_s, samplerate, temp_dir):
 57 |     """Generates a silence WAV file and returns its path."""
 58 |     if not samplerate:
 59 |         print("!! Error: Samplerate required to generate silence. Skipping.")
 60 |         return None
 61 |     print(f"\nGenerating {duration_s}s silence segment (SR: {samplerate} Hz)...")
 62 |     num_samples = int(duration_s * samplerate)
 63 |     silence_data = np.zeros(num_samples, dtype=np.float32)
 64 | 
 65 |     # Use mkstemp for unique file name, ensuring temp_dir is used
 66 |     temp_fd, temp_path = tempfile.mkstemp(suffix="_silence.wav", prefix="silence_", dir=temp_dir)
 67 |     os.close(temp_fd)
 68 |     try:
 69 |         sf.write(temp_path, silence_data, samplerate, subtype='PCM_16')
 70 |         print(f"-> Silence saved to {os.path.basename(temp_path)}")
 71 |         return temp_path
 72 |     except Exception as e:
 73 |         print(f"!! Error generating silence file: {e}")
 74 |         if os.path.exists(temp_path): os.remove(temp_path)
 75 |         return None
 76 | 
 77 | def concatenate_wavs(file_list, output_filename, target_samplerate):
 78 |     """Concatenates a list of WAV files into a single output file."""
 79 |     if not file_list:
 80 |          print("!! Error: No segment files provided for concatenation.")
 81 |          return False
 82 | 
 83 |     valid_files = [f for f in file_list if f and os.path.exists(f)]
 84 |     if not valid_files:
 85 |          print("!! Error: No valid files found in the list for concatenation.")
 86 |          return False
 87 | 
 88 |     if not target_samplerate:
 89 |         print("!! Warning: Target samplerate not provided for concatenation.")
 90 |         # Attempt to get samplerate from the first valid file
 91 |         try:
 92 |              info = sf.info(valid_files[0])
 93 |              target_samplerate = info.samplerate
 94 |              print(f"!! Using samplerate from first file ({os.path.basename(valid_files[0])}): {target_samplerate} Hz.")
 95 |         except Exception as e:
 96 |              print(f"!! Error: Could not determine target samplerate from first file: {e}")
 97 |              return False # Cannot proceed without a samplerate
 98 | 
 99 |     print(f"\nConcatenating {len(valid_files)} valid segments into {output_filename} (Target SR: {target_samplerate} Hz)...")
100 |     output_data = []
101 |     target_channels = 1 # Assume mono
102 | 
103 |     for i, filepath in enumerate(valid_files):
104 |         print(f"-> Processing file {i+1}/{len(valid_files)}: {os.path.basename(filepath)}")
105 |         try:
106 |             # Check samplerate and resample if needed
107 |             info = sf.info(filepath)
108 |             data, sr = sf.read(filepath, dtype='float32')
109 |             
110 |             if info.samplerate != target_samplerate:
111 |                 print(f"-> Resampling {os.path.basename(filepath)} from {info.samplerate} Hz to {target_samplerate} Hz...")
112 |                 # Calculate resampling ratio
113 |                 ratio = target_samplerate / info.samplerate
114 |                 n_samples = int(len(data) * ratio)
115 |                 
116 |                 # Use scipy's resample function for high-quality resampling
117 |                 data = resample(data, n_samples)
118 |                 print(f"-> Resampling complete. New length: {len(data)/target_samplerate:.2f}s")
119 | 
120 |             # Convert to mono if necessary
121 |             if info.channels == 2:
122 |                 print(f"-> Converting {os.path.basename(filepath)} to mono.")
123 |                 data = np.mean(data, axis=1)
124 |             elif info.channels != 1:
125 |                  print(f"!! Warning: Unexpected channel count ({info.channels}) in {os.path.basename(filepath)}. Attempting to process first channel.")
126 |                  # Attempt to take the first channel if more than 2? Or skip? Let's try taking first.
127 |                  if data.ndim > 1: data = data[:, 0]
128 | 
129 | 
130 |             output_data.append(data)
131 |             # Duration calculation here might be slightly off if we manipulated channels
132 |             # Let's calculate duration based on output samples / target_sr
133 |             print(f"-> Appended {os.path.basename(filepath)} ({len(data)/target_samplerate:.2f}s)")
134 | 
135 |         except Exception as e:
136 |             print(f"!! Error reading/processing {os.path.basename(filepath)}: {e}")
137 |             print("!! Skipping problematic file.")
138 |             continue # Skip file on error
139 | 
140 |     if not output_data:
141 |         print("!! No valid audio data to concatenate after processing.")
142 |         return False
143 | 
144 |     print("Concatenating final audio data...")
145 |     final_audio = np.concatenate(output_data)
146 |     final_duration = len(final_audio) / target_samplerate
147 |     print(f"Final audio length: {final_duration:.2f}s")
148 | 
149 |     try:
150 |         print(f"Writing final audio to {output_filename}...")
151 |         sf.write(output_filename, final_audio, target_samplerate, subtype='PCM_16')
152 |         print(f"\n✅ Concatenated audio saved successfully to '{output_filename}' ({final_duration:.2f}s)")
153 |         return True
154 |     except Exception as e:
155 |         print(f"!! Error writing final concatenated file '{output_filename}': {e}")
156 |         return False


--------------------------------------------------------------------------------
/functions/processing/summarization.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import re
  3 | 
  4 | from ..ai import call_ai_api # Import call_ai_api from the new ai module
  5 | from ..utils import log_to_file, clean_thinking_tags, parse_ai_tool_response, run_archive_dir # Import utilities including run_archive_dir
  6 | 
  7 | def summarize_content(scraped_texts, reference_docs_content, topic, config, args):
  8 |     """
  9 |     Uses AI to summarize scraped content and optionally reference documents,
 10 |     assigning a relevance score to each.
 11 |     """
 12 |     content_to_process = []
 13 |     # Add scraped texts with a type identifier
 14 |     for idx, text in enumerate(scraped_texts):
 15 |         content_to_process.append({"type": "scraped", "content": text, "source_index": idx + 1})
 16 | 
 17 |     # Add reference docs if summarization is requested
 18 |     if args.reference_docs_summarize and reference_docs_content:
 19 |         print(f"Including {len(reference_docs_content)} reference documents in summarization.")
 20 |         log_to_file(f"Including {len(reference_docs_content)} reference documents in summarization.")
 21 |         for doc in reference_docs_content:
 22 |              content_to_process.append({"type": "reference", "content": doc["content"], "path": doc["path"]})
 23 |     elif reference_docs_content:
 24 |          print(f"Skipping summarization for {len(reference_docs_content)} reference documents as --reference-docs-summarize is not set.")
 25 |          log_to_file(f"Skipping summarization for {len(reference_docs_content)} reference documents.")
 26 | 
 27 | 
 28 |     total_pieces = len(content_to_process)
 29 |     if total_pieces == 0:
 30 |         print("\nWarning: No content (scraped or reference for summarization) available to summarize.")
 31 |         log_to_file("Summarization Warning: No content found to process.")
 32 |         return [] # Return empty list if nothing to do
 33 | 
 34 |     print(f"\nSummarizing {total_pieces} content piece(s)...")
 35 |     log_to_file(f"Starting summarization for {total_pieces} piece(s). Topic: {topic}")
 36 |     summaries_with_scores = []
 37 |     successful_summaries = 0
 38 | 
 39 |     for i, item in enumerate(content_to_process, 1):
 40 |         text = item["content"]
 41 |         item_type = item["type"]
 42 |         item_source_id = item.get("path", f"Scraped_{item.get('source_index', i)}") # Use path for ref docs, index for scraped
 43 | 
 44 |         if len(text) < 100: # Increased minimum length
 45 |             print(f"\rSkipping summary for short text piece {i}/{total_pieces} ({item_source_id}).", end='', flush=True)
 46 |             log_to_file(f"Summary {i}/{total_pieces} ({item_source_id}) skipped (too short: {len(text)} chars).")
 47 |             continue
 48 | 
 49 |         # Show progress
 50 |         print(f"\rSummarizing & Scoring {i}/{total_pieces} ({item_type}) (Completed: {successful_summaries})", end='', flush=True)
 51 | 
 52 |         # Limit text size sent to AI if necessary (check API limits)
 53 |         max_summary_input_chars = 150000 # Example limit, adjust as needed
 54 |         truncated_text = text[:max_summary_input_chars]
 55 |         if len(text) > max_summary_input_chars:
 56 |             log_to_file(f"Warning: Summary {i} ({item_source_id}) input text truncated to {max_summary_input_chars} chars.")
 57 | 
 58 |         guidance_text = f"\n**Additional Guidance:** {args.guidance}\n" if args.guidance else ""
 59 |         prompt = (
 60 |             f"Please provide a concise yet comprehensive summary of the following text. Focus on the key information, main arguments, findings, and any specific data points (statistics, percentages, benchmark results, dates, names) relevant to the main topic.\n"
 61 |             f"**Main Topic:** {topic}{guidance_text}\n"
 62 |             f"**Text to Summarize:**\n---\n{truncated_text}\n---\n\n"
 63 |             f"**Instructions:**\n"
 64 |             f"1. Format your summary *only* within <toolScrapeSummary> tags.\n"
 65 |             f"2. After the summary tag, provide a relevance score (integer 0-10) indicating how relevant the *summary* is to the Main Topic ('{topic}') and adheres to any Additional Guidance provided. Enclose the score *only* in <summaryScore> tags.\n\n"
 66 |             f"**Example Response Structure:**\n"
 67 |             f"<toolScrapeSummary>This is a concise summary preserving key details like a 95% accuracy rate achieved in 2023 according to Dr. Smith.</toolScrapeSummary>\n"
 68 |             f"<summaryScore>8</summaryScore>"
 69 |         )
 70 | 
 71 |         raw_response, cleaned_response = call_ai_api(prompt, config, tool_name=f"Summary_{i}_{item_type}", timeout=args.ai_timeout, retries=args.ai_retries)
 72 | 
 73 |         summary = "Error: Summarization Failed"
 74 |         score = -1 # Default score
 75 |         summary_details = {"type": item_type, "source_id": item_source_id} # Store type and source id
 76 | 
 77 |         if cleaned_response:
 78 |             parsed_summary = parse_ai_tool_response(cleaned_response, "toolScrapeSummary")
 79 |             # Check if parsing returned the whole response (tag missing)
 80 |             if parsed_summary == cleaned_response and '<toolScrapeSummary>' not in cleaned_response:
 81 |                  log_to_file(f"Error: Summary {i} ({item_source_id}) parsing failed - <toolScrapeSummary> tag missing.")
 82 |                  summary = f"Error: Could not parse summary {i} ({item_source_id}) (<toolScrapeSummary> tag missing)"
 83 |             elif not parsed_summary:
 84 |                  log_to_file(f"Error: Summary {i} ({item_source_id}) parsing failed - No content found in <toolScrapeSummary> tag.")
 85 |                  summary = f"Error: Could not parse summary {i} ({item_source_id}) (empty tag)"
 86 |             else:
 87 |                  summary = parsed_summary # Use parsed summary
 88 | 
 89 |             # Extract score robustly
 90 |             score_match = re.search(r'<summaryScore>(\d{1,2})</summaryScore>', cleaned_response, re.IGNORECASE)
 91 |             if score_match:
 92 |                 try:
 93 |                     parsed_score = int(score_match.group(1))
 94 |                     if 0 <= parsed_score <= 10:
 95 |                         score = parsed_score
 96 |                         successful_summaries += 1 # Count success only if score is valid
 97 |                     else:
 98 |                         log_to_file(f"Warning: Summary {i} ({item_source_id}) score '{parsed_score}' out of range (0-10). Using -1.")
 99 |                 except ValueError:
100 |                     log_to_file(f"Warning: Could not parse summary {i} ({item_source_id}) score '{score_match.group(1)}'. Using -1.")
101 |             else:
102 |                  log_to_file(f"Warning: Could not find/parse <summaryScore> tag for summary {i} ({item_source_id}). Using -1.")
103 | 
104 |         else: # API call itself failed
105 |             log_to_file(f"Error: API call failed for Summary_{i} ({item_source_id}). Raw response was empty.")
106 |             summary = f"Error: Could not summarize text piece {i} ({item_source_id}) (API call failed or timed out)"
107 | 
108 |         # Add summary and score along with type and source identifier
109 |         summary_details = {"type": item_type, "source_id": item_source_id, 'summary': summary, 'score': score}
110 |         summaries_with_scores.append(summary_details)
111 | 
112 |         # Save the summary text to archive regardless of score validity
113 |         if run_archive_dir:
114 |             # Create a more descriptive filename
115 |             safe_source_id = re.sub(r'[\\/*?:"<>|]', "_", str(item_source_id)) # Sanitize filename chars
116 |             summary_filename = os.path.join(run_archive_dir, f"summary_{i}_{item_type}_{safe_source_id[:50]}.txt") # Truncate long paths
117 |             try:
118 |                 with open(summary_filename, 'w', encoding='utf-8') as sf:
119 |                     sf.write(f"Source: {item_source_id}\nType: {item_type}\nScore: {score}\n\n{summary}")
120 |             except IOError as e:
121 |                 log_to_file(f"Warning: Could not save summary {i} ({item_source_id}) to file {summary_filename}: {e}")
122 | 
123 | 
124 |     # Final status update
125 |     print(f"\rSummarization & Scoring complete. Generated {successful_summaries}/{total_pieces} summaries successfully (with valid scores).")
126 |     log_to_file(f"Summarization phase complete. Successful summaries (with score): {successful_summaries}/{total_pieces}")
127 |     return summaries_with_scores


--------------------------------------------------------------------------------
/functions/processing/youtube_descriptor.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import re
  3 | import json # Used for logging raw response
  4 | 
  5 | from ..ai import call_ai_api # Import call_ai_api from the new ai module
  6 | from ..utils import log_to_file, clean_thinking_tags # Import utilities
  7 | 
  8 | def generate_youtube_description(report_content, topic, config, args):
  9 |     """Uses AI to generate a YouTube-friendly description based on an existing report."""
 10 |     # Access run_archive_dir from the global scope via utils
 11 |     from ..utils import run_archive_dir
 12 | 
 13 |     print("\nGenerating YouTube description via AI...")
 14 |     log_to_file(f"Starting YouTube description generation. Topic: {topic}")
 15 | 
 16 |     # Check if we have report content to work with
 17 |     if not report_content:
 18 |         print("Error: No report content provided for YouTube description generation.")
 19 |         log_to_file("YouTube Desc Error: No report content provided.")
 20 |         return None
 21 | 
 22 |     guidance_text = f"\n**Additional Guidance:** {args.guidance}\n" if args.guidance else ""
 23 |     prompt = (
 24 |         f"You are an AI assistant tasked with creating a YouTube video description based on an existing comprehensive report about '{topic}'.{guidance_text}\n"
 25 |         f"**Topic:** {topic}\n"
 26 |         f"{guidance_text}\n"
 27 |         f"**Task:**\n"
 28 |         f"Transform the provided comprehensive report into a YouTube description-friendly format. The output should be:\n"
 29 |         f"- Concise and scannable with bullet points and clear sections\n"
 30 |         f"- Simplified technical terms for general audience\n"
 31 |         f"- Key information organized by categories/sections\n"
 32 |         f"- Quick reference format suitable for video notes\n"
 33 |         f"- Actionable insights and specific recommendations\n"
 34 |         f"- YouTube-compatible plain text formatting (NO MARKDOWN)\n\n"
 35 |         f"Structure the YouTube description with:\n"
 36 |         f"1. Brief intro explaining what the topic covers\n"
 37 |         f"2. Key points organized in bullet format\n"
 38 |         f"3. Main categories/sections from the report\n"
 39 |         f"4. Quick takeaways and recommendations\n"
 40 |         f"5. Simplified explanations of technical concepts\n\n"
 41 |         f"IMPORTANT FORMATTING RULES FOR YOUTUBE:\n"
 42 |         f"- Use UPPERCASE for section headers (not ## or ###)\n"
 43 |         f"- Use • or - for bullet points (not * in markdown)\n"
 44 |         f"- NO bold (**text**), italics (*text*), or other markdown formatting\n"
 45 |         f"- Use emojis for visual appeal and section separation\n"
 46 |         f"- Use line breaks and spacing for readability\n"
 47 |         f"- Plain text only - YouTube descriptions don't support markdown\n\n"
 48 |         f"**Source Report Content:**\n"
 49 |         f"---\n{report_content}\n---\n\n"
 50 |         f"CRITICAL FORMATTING RULES (OUTPUT MUST FOLLOW EXACTLY):\n"
 51 |         f"1. OUTPUT TAG: You MUST enclose the entire YouTube description content within a single pair of `<youtubeDescription>` tags.\n"
 52 |         f"2. CONTENT: Use PLAIN TEXT formatting only - YouTube does NOT support markdown.\n"
 53 |         f"3. STYLE: Write in a friendly, accessible tone suitable for YouTube audience.\n"
 54 |         f"4. LENGTH: Keep it comprehensive but scannable - aim for detailed notes that viewers can quickly reference.\n"
 55 |         f"5. FORMATTING: Use UPPERCASE for headers, • or - for bullets, emojis for visual appeal, and line breaks for spacing.\n"
 56 |         f"6. NO MARKDOWN: Absolutely no **bold**, *italic*, ##headers##, or other markdown syntax.\n"
 57 |         f"7. NO EXTRA TEXT: ONLY include the YouTube description text inside the `<youtubeDescription>` tags. ABSOLUTELY NO other text, introductory phrases, explanations, or thinking tags should be present anywhere in the final output.\n\n"
 58 |         f"Remember: The entire output MUST be ONLY the YouTube description text enclosed in a single `<youtubeDescription>` tag. Use PLAIN TEXT formatting only - no markdown syntax whatsoever since YouTube descriptions don't support it."
 59 |     )
 60 | 
 61 |     # Save YouTube description prompt
 62 |     if run_archive_dir:
 63 |         prompt_filename = os.path.join(run_archive_dir, "youtube_description_prompt.txt")
 64 |         try:
 65 |             with open(prompt_filename, 'w', encoding='utf-8') as pf: 
 66 |                 pf.write(prompt)
 67 |             log_to_file(f"Saved YouTube description prompt to {prompt_filename}")
 68 |         except IOError as e: 
 69 |             log_to_file(f"Warning: Could not save YouTube description prompt: {e}")
 70 | 
 71 |     # Call AI
 72 |     raw_response, cleaned_response = call_ai_api(prompt, config, tool_name="YouTubeDescriptionGeneration", timeout=args.ai_timeout, retries=args.ai_retries)
 73 | 
 74 |     # Save raw response
 75 |     if run_archive_dir and raw_response:
 76 |         raw_resp_filename = os.path.join(run_archive_dir, "youtube_description_response_raw.txt")
 77 |         try:
 78 |             with open(raw_resp_filename, 'w', encoding='utf-8') as rf: 
 79 |                 rf.write(raw_response)
 80 |             log_to_file(f"Saved YouTube description raw response to {raw_resp_filename}")
 81 |         except IOError as e: 
 82 |             log_to_file(f"Warning: Could not save YouTube description raw response: {e}")
 83 | 
 84 |     if not cleaned_response:
 85 |         print("\nError: Failed to generate YouTube description from AI (empty cleaned response).")
 86 |         log_to_file("YouTube Desc Error: Failed (empty cleaned response).")
 87 |         return None
 88 | 
 89 |     # Parse the response - Find last <youtubeDescription> tag after cleaning <think> tags
 90 |     cleaned_text_for_youtube = clean_thinking_tags(cleaned_response)
 91 |     youtube_description_text = None
 92 |     if cleaned_text_for_youtube:
 93 |         last_opening_tag_index = cleaned_text_for_youtube.rfind('<youtubeDescription>')
 94 |         if last_opening_tag_index != -1:
 95 |             closing_tag_index = cleaned_text_for_youtube.find('</youtubeDescription>', last_opening_tag_index)
 96 |             if closing_tag_index != -1:
 97 |                 start_content = last_opening_tag_index + len('<youtubeDescription>')
 98 |                 youtube_description_text = cleaned_text_for_youtube[start_content:closing_tag_index].strip()
 99 | 
100 |     if not youtube_description_text: # Check if parsing failed or resulted in empty string
101 |         print("\nError: Could not parse valid <youtubeDescription> content from the AI response.")
102 |         log_to_file(f"YouTube Desc Error: Failed to parse <youtubeDescription> tag or content was empty.\nCleaned Response was:\n{cleaned_text_for_youtube}")
103 |         # Save the failed YouTube description output for debugging
104 |         if run_archive_dir:
105 |             failed_youtube_path = os.path.join(run_archive_dir, "youtube_description_FAILED_PARSE.txt")
106 |             try:
107 |                 with open(failed_youtube_path, 'w', encoding='utf-8') as fyf: 
108 |                     fyf.write(cleaned_text_for_youtube or "Original cleaned response was empty.")
109 |             except IOError: 
110 |                 pass
111 |         return None
112 | 
113 |     # Save the YouTube description
114 |     final_youtube_filename = "youtube_description.md"
115 |     final_youtube_filepath = os.path.join(run_archive_dir, final_youtube_filename) if run_archive_dir else final_youtube_filename
116 | 
117 |     try:
118 |         with open(final_youtube_filepath, 'w', encoding='utf-8') as ef:
119 |             ef.write(youtube_description_text)
120 |         print(f"Saved generated YouTube description to {final_youtube_filepath}")
121 |         log_to_file(f"YouTube description saved to {final_youtube_filepath}")
122 |         return final_youtube_filepath
123 |     except IOError as e:
124 |         print(f"\nError: Could not save generated YouTube description to {final_youtube_filepath}: {e}")
125 |         log_to_file(f"YouTube Desc Saving Error: Failed to save YouTube description to {final_youtube_filepath}: {e}")
126 |         # Try saving to CWD as fallback ONLY if archive failed
127 |         if run_archive_dir:
128 |             try:
129 |                 cwd_filename = final_youtube_filename
130 |                 with open(cwd_filename, 'w', encoding='utf-8') as ef_cwd: 
131 |                     ef_cwd.write(youtube_description_text)
132 |                 print(f"Saved generated YouTube description to {cwd_filename} (in CWD as fallback)")
133 |                 log_to_file(f"YouTube description saved to CWD fallback: {cwd_filename}")
134 |                 return cwd_filename
135 |             except IOError as e_cwd:
136 |                 print(f"\nError: Could not save YouTube description to CWD fallback path either: {e_cwd}")
137 |                 log_to_file(f"YouTube Desc Saving Error: Failed to save YouTube description to CWD fallback: {e_cwd}")
138 |                 return None
139 |         else:
140 |             return None


--------------------------------------------------------------------------------
/templates/main_dashboard.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html lang="en">
  3 | <head>
  4 |     <meta charset="UTF-8">
  5 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
  6 |     <title>Ecne AI Podcaster Control Panel</title>
  7 |     <link rel="stylesheet" href="{{ url_for('static', filename='main.css') }}">
  8 | </head>
  9 | <body>
 10 |     <div class="container">
 11 |         <h1>Ecne AI Podcaster Control Panel</h1>
 12 |         <p>Welcome to your centralized hub for generating podcast scripts and audio/video content.</p>
 13 | 
 14 |         <div class="menu-grid">
 15 |             <a href="/script_builder" class="menu-item">
 16 |                 <h2>Generate Script</h2>
 17 |                 <p>Create new podcast scripts using AI and various data sources.</p>
 18 |             </a>
 19 |             <a href="/podcast_builder" class="menu-item">
 20 |                 <h2>Generate Podcast (Audio/Video)</h2>
 21 |                 <p>Turn your scripts into full audio and video podcasts.</p>
 22 |             </a>
 23 |             <a href="/history" class="menu-item">
 24 |                 <h2>History / View Outputs</h2>
 25 |                 <p>Browse and manage all your generated scripts, audio, and videos.</p>
 26 |             </a>
 27 |             <a href="/settings" class="menu-item">
 28 |                 <h2>Settings</h2>
 29 |                 <p>Configure API keys and LLM model settings.</p>
 30 |             </a>
 31 |         </div>
 32 | 
 33 |         <!-- Docker Status Widget -->
 34 |         <div class="docker-status-widget" id="docker-status-widget">
 35 |             <h3>🐳 Orpheus TTS Service Status</h3>
 36 |             <div class="status-indicator" id="docker-status-indicator">
 37 |                 <span class="status-dot" id="docker-status-dot"></span>
 38 |                 <span class="status-text" id="docker-status-text">Checking...</span>
 39 |             </div>
 40 |             <div class="docker-controls" id="docker-controls" style="display: none;">
 41 |                 <button id="docker-start-btn" class="docker-btn start-btn">Start Service</button>
 42 |                 <button id="docker-stop-btn" class="docker-btn stop-btn">Stop Service</button>
 43 |                 <a href="http://127.0.0.1:5005" target="_blank" id="docker-ui-link" class="docker-btn ui-btn" style="display: none;">Open TTS UI</a>
 44 |             </div>
 45 |             <div class="docker-message" id="docker-message"></div>
 46 |         </div>
 47 |     </div>
 48 | 
 49 |     <script>
 50 |         // Docker Status Management
 51 |         let dockerStatusInterval;
 52 | 
 53 |         // Check Docker status on page load
 54 |         document.addEventListener('DOMContentLoaded', function() {
 55 |             checkDockerStatus();
 56 |             // Check status every 30 seconds
 57 |             dockerStatusInterval = setInterval(checkDockerStatus, 30000);
 58 |         });
 59 | 
 60 |         async function checkDockerStatus() {
 61 |             try {
 62 |                 const response = await fetch('/docker/status');
 63 |                 const data = await response.json();
 64 |                 updateDockerUI(data);
 65 |             } catch (error) {
 66 |                 console.error('Error checking Docker status:', error);
 67 |                 updateDockerUI({
 68 |                     status: 'error',
 69 |                     message: 'Failed to check Docker status',
 70 |                     containers: []
 71 |                 });
 72 |             }
 73 |         }
 74 | 
 75 |         function updateDockerUI(data) {
 76 |             const statusDot = document.getElementById('docker-status-dot');
 77 |             const statusText = document.getElementById('docker-status-text');
 78 |             const controls = document.getElementById('docker-controls');
 79 |             const startBtn = document.getElementById('docker-start-btn');
 80 |             const stopBtn = document.getElementById('docker-stop-btn');
 81 |             const uiLink = document.getElementById('docker-ui-link');
 82 |             const message = document.getElementById('docker-message');
 83 | 
 84 |             // Update status indicator
 85 |             statusDot.className = 'status-dot';
 86 |             switch (data.status) {
 87 |                 case 'running':
 88 |                     statusDot.classList.add('status-running');
 89 |                     statusText.textContent = `Running (${data.containers.filter(c => c.running).length} containers)`;
 90 |                     startBtn.style.display = 'none';
 91 |                     stopBtn.style.display = 'inline-block';
 92 |                     uiLink.style.display = 'inline-block';
 93 |                     break;
 94 |                 case 'stopped':
 95 |                     statusDot.classList.add('status-stopped');
 96 |                     statusText.textContent = 'Stopped';
 97 |                     startBtn.style.display = 'inline-block';
 98 |                     stopBtn.style.display = 'none';
 99 |                     uiLink.style.display = 'none';
100 |                     break;
101 |                 case 'building':
102 |                     statusDot.classList.add('status-building');
103 |                     statusText.textContent = 'Starting/Building...';
104 |                     startBtn.style.display = 'none';
105 |                     stopBtn.style.display = 'inline-block';
106 |                     uiLink.style.display = 'none';
107 |                     break;
108 |                 case 'not_installed':
109 |                     statusDot.classList.add('status-error');
110 |                     statusText.textContent = 'Not Installed';
111 |                     startBtn.style.display = 'none';
112 |                     stopBtn.style.display = 'none';
113 |                     uiLink.style.display = 'none';
114 |                     break;
115 |                 default:
116 |                     statusDot.classList.add('status-error');
117 |                     statusText.textContent = 'Error';
118 |                     startBtn.style.display = 'none';
119 |                     stopBtn.style.display = 'none';
120 |                     uiLink.style.display = 'none';
121 |                     break;
122 |             }
123 | 
124 |             // Show controls if Docker is available
125 |             if (data.status !== 'not_installed' && data.status !== 'error') {
126 |                 controls.style.display = 'block';
127 |             } else {
128 |                 controls.style.display = 'none';
129 |             }
130 | 
131 |             // Update message
132 |             message.textContent = data.message;
133 |         }
134 | 
135 |         // Start Docker containers
136 |         document.getElementById('docker-start-btn').addEventListener('click', async function() {
137 |             if (!confirm('Do you want to start the Orpheus TTS Docker service? This may take a few minutes.')) {
138 |                 return;
139 |             }
140 | 
141 |             const button = this;
142 |             const originalText = button.textContent;
143 |             button.textContent = 'Starting...';
144 |             button.disabled = true;
145 | 
146 |             try {
147 |                 const response = await fetch('/docker/start', { method: 'POST' });
148 |                 const data = await response.json();
149 |                 
150 |                 if (data.status === 'success') {
151 |                     document.getElementById('docker-message').textContent = data.message;
152 |                     // Check status after a short delay
153 |                     setTimeout(checkDockerStatus, 3000);
154 |                 } else {
155 |                     alert('Failed to start Docker service: ' + data.message);
156 |                 }
157 |             } catch (error) {
158 |                 console.error('Error starting Docker:', error);
159 |                 alert('Error starting Docker service: ' + error.message);
160 |             } finally {
161 |                 button.textContent = originalText;
162 |                 button.disabled = false;
163 |             }
164 |         });
165 | 
166 |         // Stop Docker containers
167 |         document.getElementById('docker-stop-btn').addEventListener('click', async function() {
168 |             if (!confirm('Do you want to stop the Orpheus TTS Docker service?')) {
169 |                 return;
170 |             }
171 | 
172 |             const button = this;
173 |             const originalText = button.textContent;
174 |             button.textContent = 'Stopping...';
175 |             button.disabled = true;
176 | 
177 |             try {
178 |                 const response = await fetch('/docker/stop', { method: 'POST' });
179 |                 const data = await response.json();
180 |                 
181 |                 if (data.status === 'success') {
182 |                     document.getElementById('docker-message').textContent = data.message;
183 |                     // Check status after a short delay
184 |                     setTimeout(checkDockerStatus, 2000);
185 |                 } else {
186 |                     alert('Failed to stop Docker service: ' + data.message);
187 |                 }
188 |             } catch (error) {
189 |                 console.error('Error stopping Docker:', error);
190 |                 alert('Error stopping Docker service: ' + error.message);
191 |             } finally {
192 |                 button.textContent = originalText;
193 |                 button.disabled = false;
194 |             }
195 |         });
196 |     </script>
197 | </body>
198 | </html>
199 | 


--------------------------------------------------------------------------------
/functions/processing/report_generation.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import re
  3 | import json # Used for logging raw response
  4 | 
  5 | from ..ai import call_ai_api # Import call_ai_api from the new ai module
  6 | from ..utils import log_to_file, clean_thinking_tags # Import utilities
  7 | 
  8 | def generate_report(summaries_with_scores, reference_docs_content, topic, config, args):
  9 |     """Uses AI to generate a written report/paper based on summaries and optionally full reference docs."""
 10 |     # Access run_archive_dir from the global scope via utils
 11 |     from ..utils import run_archive_dir
 12 | 
 13 |     print("\nGenerating report via AI...")
 14 |     log_to_file(f"Starting report generation. Topic: {topic}")
 15 | 
 16 |     # --- Process Summaries ---
 17 |     # Use all valid summaries (which might include summarized ref docs), sorted by score
 18 |     valid_summaries = [s for s in summaries_with_scores if s['score'] >= 0 and not s['summary'].startswith("Error:")]
 19 |     num_summaries_used = 0
 20 |     combined_summaries_text = "No valid summaries were generated or met the criteria."
 21 | 
 22 |     if valid_summaries:
 23 |         top_summaries = sorted(valid_summaries, key=lambda x: x['score'], reverse=True)
 24 |         num_summaries_used = len(top_summaries)
 25 |         print(f"Using {num_summaries_used} summaries for report generation.")
 26 |         log_to_file(f"Report Gen: Using {num_summaries_used} valid summaries.")
 27 |         combined_summaries_text = "\n\n".join([
 28 |             # Include source info in the report prompt context as well
 29 |             f"Summary {i+1} (Source: {s['source_id']}, Type: {s['type']}, Score: {s['score']}):\n{s['summary']}"
 30 |             for i, s in enumerate(top_summaries)
 31 |         ])
 32 |     else:
 33 |          print("Warning: No valid summaries available for report generation.")
 34 |          log_to_file("Report Gen Warning: No valid summaries found.")
 35 |          # We might still proceed if full reference docs are available
 36 | 
 37 |     # --- Process Full Reference Documents (If Not Summarized) ---
 38 |     full_reference_docs_text = ""
 39 |     num_ref_docs_used = 0
 40 |     if reference_docs_content and not args.reference_docs_summarize:
 41 |         num_ref_docs_used = len(reference_docs_content)
 42 |         print(f"Including {num_ref_docs_used} full reference documents directly in the report prompt.")
 43 |         log_to_file(f"Report Gen: Including {num_ref_docs_used} full reference documents.")
 44 |         full_reference_docs_text = "\n\n---\n\n".join([
 45 |             f"Reference Document (Path: {doc['path']}):\n{doc['content']}"
 46 |             for doc in reference_docs_content
 47 |         ])
 48 |         # Add a header for clarity in the prompt
 49 |         full_reference_docs_text = f"**Full Reference Documents (Use for context):**\n---\n{full_reference_docs_text}\n---"
 50 | 
 51 |     # Check if we have *any* content to generate from
 52 |     if num_summaries_used == 0 and num_ref_docs_used == 0:
 53 |          print("Error: No summaries or reference documents available to generate report.")
 54 |          log_to_file("Report Gen Error: No summaries or reference documents available for context.")
 55 |          return None # Cannot generate report without context
 56 | 
 57 |     guidance_text = f"\n**Additional Guidance:** {args.guidance}\n" if args.guidance else ""
 58 |     prompt = (
 59 |         f"You are an AI assistant tasked with writing a well-structured, informative research paper/report on the topic: '{topic}'.{guidance_text}\n"
 60 |         f"**Topic:** {topic}\n"
 61 |         f"{guidance_text}\n" # Add guidance here as well for clarity
 62 |         f"**Task:**\n"
 63 |         f"Generate a comprehensive, well-structured, and informative research paper/report based *thoroughly* on the provided context (summaries and/or full reference documents). Synthesize the information, identify key themes, arguments, evidence, and supporting details (including specific statistics, names, dates, or benchmarks mentioned). Structure the report logically with an introduction (defining the topic and scope), body paragraphs (each exploring a specific facet or theme derived from the context, citing evidence implicitly), and a conclusion (summarizing key findings and potential implications or future directions). Maintain an objective, formal, and informative tone suitable for a research report. **Crucially, this must be a written report/essay format, NOT a script or dialogue.**\n\n"
 64 |         f"**Context for Report Generation (Analyze ALL):**\n\n"
 65 |         f"--- Summaries (Analyze these first) ---\n{combined_summaries_text}\n---\n\n"
 66 |         f"{full_reference_docs_text}\n\n" # This will be empty if no full docs were used
 67 |         f"**CRITICAL FORMATTING RULES (OUTPUT MUST FOLLOW EXACTLY):**\n"
 68 |         f"1. **OUTPUT TAG:** You MUST enclose the *entire* report content within a single pair of `<reportGenerate>` tags.\n"
 69 |         f"2. **CONTENT:** The content should be well-written, coherent, and directly based on the provided summaries.\n"
 70 |         f"3. **NO EXTRA TEXT:** ONLY include the report text inside the `<reportGenerate>` tags. **ABSOLUTELY NO** other text, introductory phrases, explanations, or thinking tags (`<think>...</think>`) should be present anywhere in the final output.\n\n"
 71 |         f"Remember: The entire output MUST be ONLY the report text enclosed in a single `<reportGenerate>` tag."
 72 |     )
 73 | 
 74 |     # Save report prompt
 75 |     if run_archive_dir:
 76 |         prompt_filename = os.path.join(run_archive_dir, "report_prompt.txt")
 77 |         try:
 78 |             with open(prompt_filename, 'w', encoding='utf-8') as pf: pf.write(prompt)
 79 |             log_to_file(f"Saved report prompt to {prompt_filename}")
 80 |         except IOError as e: log_to_file(f"Warning: Could not save report prompt: {e}")
 81 | 
 82 |     # Call AI
 83 |     raw_response, cleaned_response = call_ai_api(prompt, config, tool_name="ReportGeneration", timeout=args.ai_timeout, retries=args.ai_retries)
 84 | 
 85 |     # Save raw response
 86 |     if run_archive_dir and raw_response:
 87 |         raw_resp_filename = os.path.join(run_archive_dir, "report_response_raw.txt")
 88 |         try:
 89 |             with open(raw_resp_filename, 'w', encoding='utf-8') as rf: rf.write(raw_response)
 90 |             log_to_file(f"Saved report raw response to {raw_resp_filename}")
 91 |         except IOError as e: log_to_file(f"Warning: Could not save report raw response: {e}")
 92 | 
 93 |     if not cleaned_response:
 94 |         print("\nError: Failed to generate report from AI (empty cleaned response).")
 95 |         log_to_file("Report Gen Error: Failed (empty cleaned response).")
 96 |         return None
 97 | 
 98 |     # Parse the response - Find last <reportGenerate> tag after cleaning <think> tags
 99 |     cleaned_text_for_report = clean_thinking_tags(cleaned_response)
100 |     report_text = None
101 |     if cleaned_text_for_report:
102 |         last_opening_tag_index = cleaned_text_for_report.rfind('<reportGenerate>')
103 |         if last_opening_tag_index != -1:
104 |             closing_tag_index = cleaned_text_for_report.find('</reportGenerate>', last_opening_tag_index)
105 |             if closing_tag_index != -1:
106 |                 start_content = last_opening_tag_index + len('<reportGenerate>')
107 |                 report_text = cleaned_text_for_report[start_content:closing_tag_index].strip()
108 | 
109 |     if not report_text: # Check if parsing failed or resulted in empty string
110 |         print("\nError: Could not parse valid <reportGenerate> content from the AI response.")
111 |         log_to_file(f"Report Gen Error: Failed to parse <reportGenerate> tag or content was empty.\nCleaned Response was:\n{cleaned_text_for_report}")
112 |         # Save the failed report output for debugging
113 |         if run_archive_dir:
114 |             failed_report_path = os.path.join(run_archive_dir, "report_FAILED_PARSE.txt")
115 |             try:
116 |                 with open(failed_report_path, 'w', encoding='utf-8') as frf: frf.write(cleaned_text_for_report or "Original cleaned response was empty.")
117 |             except IOError: pass
118 |         return None
119 | 
120 |     # Save the report
121 |     final_report_filename = "podcast_report.txt"
122 |     final_report_filepath = os.path.join(run_archive_dir, final_report_filename) if run_archive_dir else final_report_filename
123 | 
124 |     try:
125 |         with open(final_report_filepath, 'w', encoding='utf-8') as ef:
126 |             ef.write(report_text)
127 |         print(f"Saved generated report to {final_report_filepath}")
128 |         log_to_file(f"Report saved to {final_report_filepath}")
129 |         return final_report_filepath
130 |     except IOError as e:
131 |         print(f"\nError: Could not save generated report to {final_report_filepath}: {e}")
132 |         log_to_file(f"Report Saving Error: Failed to save report to {final_report_filepath}: {e}")
133 |         # Try saving to CWD as fallback ONLY if archive failed
134 |         if run_archive_dir:
135 |             try:
136 |                 cwd_filename = final_report_filename
137 |                 with open(cwd_filename, 'w', encoding='utf-8') as ef_cwd: ef_cwd.write(report_text)
138 |                 print(f"Saved generated report to {cwd_filename} (in CWD as fallback)")
139 |                 log_to_file(f"Report saved to CWD fallback: {cwd_filename}")
140 |                 return cwd_filename
141 |             except IOError as e_cwd:
142 |                 print(f"\nError: Could not save report to CWD fallback path either: {e_cwd}")
143 |                 log_to_file(f"Report Saving Error: Failed to save report to CWD fallback: {e_cwd}")
144 |                 return None
145 |         else:
146 |             return None
147 | 


--------------------------------------------------------------------------------
/templates/podcast_builder_form.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html lang="en">
  3 | <head>
  4 |     <meta charset="UTF-8">
  5 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
  6 |     <title>Podcast Audio/Video Generator</title>
  7 |     <link rel="stylesheet" href="{{ url_for('static', filename='main.css') }}">
  8 | </head>
  9 | <body>
 10 |     <div class="container">
 11 |         <h1>Podcast Audio/Video Generator</h1>
 12 | 
 13 |         <nav>
 14 |             <a href="{{ url_for('index') }}" class="back-button">Back to Dashboard</a>
 15 |         </nav>
 16 | 
 17 |         <form id="podcast-form">
 18 | 
 19 |             <h2>Script Input</h2>
 20 |             <div class="form-group">
 21 |                 <label for="script_select">Select Script:</label>
 22 |                 <select id="script_select" name="script_select">
 23 |                     <option value="">Loading scripts...</option>
 24 |                 </select>
 25 |             </div>
 26 |             <div class="form-group" id="script_file_group" style="display: none;">
 27 |                 <label for="script_file">Upload Script File (.txt):</label>
 28 |                 <input type="file" id="script_file" name="script_file" accept=".txt">
 29 |             </div>
 30 | 
 31 |             <h2>Voice Settings</h2>
 32 |             <div class="form-group">
 33 |                 <label for="host_voice">Host Voice:</label>
 34 |                 <input type="text" id="host_voice" name="host_voice" value="leo">
 35 |             </div>
 36 |             <div class="form-group" id="guest_voice_group">
 37 |                 <label for="guest_voice">Guest Voice:</label>
 38 |                 <input type="text" id="guest_voice" name="guest_voice" value="tara">
 39 |             </div>
 40 |             <div class="form-group">
 41 |                 <label for="speed">Speech Speed Factor (0.5 to 1.5):</label>
 42 |                 <input type="number" id="speed" name="speed" value="1.0" step="0.1" min="0.5" max="1.5">
 43 |             </div>
 44 |             <div class="form-group">
 45 |                 <label for="silence">Silence Duration between segments (seconds):</label>
 46 |                 <input type="number" id="silence" name="silence" value="1.0" step="0.1" min="0.0">
 47 |             </div>
 48 |             <div class="form-group checkbox-group">
 49 |                 <input type="checkbox" id="guest_breakup" name="guest_breakup">
 50 |                 <label for="guest_breakup">Break Guest dialogue into sentences</label>
 51 |             </div>
 52 | 
 53 |             <button type="button" class="collapsible">Advanced Settings</button>
 54 |             <div class="content">
 55 |                 <h3>Quality Presets</h3>
 56 |                 <div class="form-group">
 57 |                     <label for="quality_preset">Select Quality Preset:</label>
 58 |                     <select id="quality_preset" name="quality_preset">
 59 |                         <option value="">Custom</option>
 60 |                         <option value="low">Low Quality</option>
 61 |                         <option value="medium" selected>Medium Quality</option>
 62 |                         <option value="high">High Quality</option>
 63 |                     </select>
 64 |                 </div>
 65 | 
 66 |                 <h3>API Settings</h3>
 67 |                 <div class="form-group">
 68 |                     <label for="api_host">Orpheus-FastAPI Host:</label>
 69 |                     <input type="text" id="api_host" name="api_host" value="127.0.0.1">
 70 |                 </div>
 71 |                 <div class="form-group">
 72 |                     <label for="port">Orpheus-FastAPI Port:</label>
 73 |                     <input type="number" id="port" name="port" value="5005">
 74 |                 </div>
 75 |                 <div class="form-group">
 76 |                     <label for="tts_max_retries">TTS Max Retries (for failed requests):</label>
 77 |                     <input type="number" id="tts_max_retries" name="tts_max_retries" value="3" min="1" max="10">
 78 |                 </div>
 79 |                 <div class="form-group">
 80 |                     <label for="tts_timeout">TTS Timeout per request (seconds):</label>
 81 |                     <input type="number" id="tts_timeout" name="tts_timeout" value="180" min="30" max="600">
 82 |                 </div>
 83 | 
 84 |                 <h3>Output & Video Settings</h3>
 85 |                 <div class="form-group">
 86 |                     <label for="output_filename">Output Base Filename (Optional):</label>
 87 |                     <input type="text" id="output_filename" name="output_filename" placeholder="e.g., my_podcast">
 88 |                 </div>
 89 |                 <div class="form-group">
 90 |                     <label for="video_resolution">Video Resolution:</label>
 91 |                     <select id="video_resolution" name="video_resolution">
 92 |                         <option value="1280x720">1280x720 (HD)</option>
 93 |                         <option value="1920x1080" selected>1920x1080 (Full HD)</option>
 94 |                         <option value="3840x2160">3840x2160 (4K UHD)</option>
 95 |                     </select>
 96 |                 </div>
 97 |                 <div class="form-group">
 98 |                     <label for="video_fps">Video FPS:</label>
 99 |                     <select id="video_fps" name="video_fps">
100 |                         <option value="24">24</option>
101 |                         <option value="30" selected>30</option>
102 |                         <option value="60">60</option>
103 |                         <option value="90">90</option>
104 |                         <option value="120">120</option>
105 |                     </select>
106 |                 </div>
107 |                 <div class="form-group">
108 |                     <label for="video_character_scale">Character Scale:</label>
109 |                     <input type="number" id="video_character_scale" name="video_character_scale" value="1.0" step="0.1" min="0.1">
110 |                 </div>
111 |                 <div class="form-group">
112 |                     <label for="video_fade">Video Fade Duration (seconds):</label>
113 |                     <input type="number" id="video_fade" name="video_fade" value="1.0" step="0.1" min="0.0">
114 |                 </div>
115 |                 <div class="form-group">
116 |                     <label for="video_intermediate_preset">Intermediate Encoding Preset:</label>
117 |                     <select id="video_intermediate_preset" name="video_intermediate_preset">
118 |                         <option value="ultrafast">ultrafast</option>
119 |                         <option value="medium" selected>medium</option>
120 |                         <option value="slow">slow</option>
121 |                     </select>
122 |                 </div>
123 |                 <div class="form-group">
124 |                     <label for="video_intermediate_crf">Intermediate CRF (0-51):</label>
125 |                     <select id="video_intermediate_crf" name="video_intermediate_crf">
126 |                         <option value="28">Low Quality (CRF 28)</option>
127 |                         <option value="23" selected>Medium Quality (CRF 23)</option>
128 |                         <option value="18">High Quality (CRF 18)</option>
129 |                     </select>
130 |                 </div>
131 |                 <div class="form-group">
132 |                     <label for="video_final_audio_bitrate">Final Audio Bitrate:</label>
133 |                     <select id="video_final_audio_bitrate" name="video_final_audio_bitrate">
134 |                         <option value="96k">96k (Low Quality)</option>
135 |                         <option value="192k" selected>192k (Medium Quality)</option>
136 |                         <option value="320k">320k (High Quality)</option>
137 |                     </select>
138 |                 </div>
139 |                 <div class="form-group">
140 |                     <label for="video_workers">Video Workers (CPU count default):</label>
141 |                     <input type="number" id="video_workers" name="video_workers" min="1">
142 |                 </div>
143 |                 <div class="form-group checkbox-group">
144 |                     <input type="checkbox" id="video_keep_temp" name="video_keep_temp">
145 |                     <label for="video_keep_temp">Keep Temporary Video Files</label>
146 |                 </div>
147 |             </div>
148 | 
149 |             <button type="submit" id="generate-podcast-button">Generate New Podcast</button>
150 |             <button type="button" id="stop-podcast-button" style="background-color: #dc3545; display: none;">Stop Process</button>
151 |         </form>
152 | 
153 |         <div class="resume-section">
154 |             <h2>Or Resume Existing Podcast</h2>
155 |             <div id="resume-container" class="form-group inline">
156 |                 <select id="podcast-select" name="podcast_select">
157 |                     <option value="">Loading existing podcasts...</option>
158 |                 </select>
159 |                 <button type="button" id="load-podcast-button">Load Selected Podcast</button>
160 |             </div>
161 |         </div>
162 | 
163 |         <div id="podcast-results" style="display: none;">
164 |             <h2>Result</h2>
165 |             <p>Podcast generated successfully!</p>
166 |             <div id="podcast-output-links"></div>
167 |         </div>
168 |     </div>
169 | 
170 |     <!-- Progress Modal Overlay -->
171 |     <div id="podcast-progress-modal" class="modal-overlay" style="display: none;">
172 |         <div class="modal-content">
173 |             <h2>Generating Podcast...</h2>
174 |             <div class="spinner"></div>
175 |             <p id="podcast-progress-message">Initializing...</p>
176 |             <pre id="podcast-console-output"></pre>
177 |             <button type="button" id="close-podcast-modal-button" style="display: none;">OK</button>
178 |         </div>
179 |     </div>
180 | 
181 |     <script src="{{ url_for('static', filename='main.js') }}"></script>
182 | </body>
183 | </html>
184 | 


--------------------------------------------------------------------------------
/functions/tts/processing.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import subprocess # Added for subprocess.run
  3 | import shlex
  4 | import tempfile
  5 | import shutil
  6 | import numpy as np
  7 | import soundfile as sf
  8 | 
  9 | try:
 10 |     from pydub import AudioSegment
 11 |     pydub_available = True
 12 | except ImportError:
 13 |     print("Warning: 'pydub' library not found. Audio manipulation (gain, trim, padding) disabled.")
 14 |     pydub_available = False
 15 | 
 16 | def apply_audio_enhancements(audio_path, config, temp_dir):
 17 |     """
 18 |     Applies FFmpeg enhancements (noise reduction, compression, normalization, de-essing)
 19 |     and pydub processing (gain, trim, padding) to an audio file.
 20 | 
 21 |     Args:
 22 |         audio_path (str): Path to the input audio file.
 23 |         config (dict): Dictionary containing processing parameters (e.g., nr_level, gain_factor, etc.).
 24 |         temp_dir (str): Path to a temporary directory for intermediate files.
 25 | 
 26 |     Returns:
 27 |         tuple: (path_to_processed_file, samplerate) or (None, None) on failure.
 28 |     """
 29 |     processed_audio_path = audio_path
 30 |     samplerate = None
 31 | 
 32 |     if not os.path.exists(audio_path):
 33 |         print(f"Error: Input audio file not found for processing: {audio_path}")
 34 |         return None, None
 35 | 
 36 |     # Get samplerate from the initial audio file
 37 |     try:
 38 |         info = sf.info(audio_path)
 39 |         samplerate = info.samplerate
 40 |     except Exception as e:
 41 |         print(f"Error getting samplerate from {audio_path}: {e}")
 42 |         return None, None
 43 | 
 44 |     # --- FFmpeg Enhancement (Conditional) ---
 45 |     final_apply_ffmpeg = config.get('apply_ffmpeg_enhancement', True)
 46 |     final_apply_deesser = config.get('apply_deesser', True) # Default ON
 47 |     final_deesser_freq = config.get('deesser_freq', 3000)
 48 |     final_nr_level = config.get('nr_level', 0)
 49 |     final_compress_thresh = config.get('compress_thresh', 1.0)
 50 |     final_compress_ratio = config.get('compress_ratio', 1)
 51 |     final_norm_frame_len = config.get('norm_frame_len', 10)
 52 |     final_norm_gauss_size = config.get('norm_gauss_size', 3)
 53 | 
 54 |     ffmpeg_temp_path = None
 55 | 
 56 |     if final_apply_ffmpeg:
 57 |         # Ensure final norm_gauss_size is odd
 58 |         if final_norm_gauss_size % 2 == 0:
 59 |             final_norm_gauss_size -= 1
 60 |             print(f"  Adjusting Norm Gauss size from {final_norm_gauss_size + 1} to {final_norm_gauss_size} (must be odd).")
 61 | 
 62 |         ffmpeg_temp_fd, ffmpeg_temp_path = tempfile.mkstemp(suffix="_ffmpeg.wav", prefix="segment_", dir=temp_dir)
 63 |         os.close(ffmpeg_temp_fd)
 64 | 
 65 |         try:
 66 |             filter_chain = []
 67 | 
 68 |             if final_apply_deesser:
 69 |                 filter_chain.append(f"firequalizer=gain='if(gte(f,{final_deesser_freq}),-5,0)'")
 70 | 
 71 |             if final_nr_level > 0:
 72 |                 filter_chain.append(f"afftdn=nr={final_nr_level}")
 73 | 
 74 |             comp_thresh_str = f"{final_compress_thresh:.3f}"
 75 |             filter_chain.append(f"acompressor=threshold={comp_thresh_str}:ratio={final_compress_ratio}:attack=10:release=100")
 76 | 
 77 |             filter_chain.append(f"dynaudnorm=f={final_norm_frame_len}:g={final_norm_gauss_size}")
 78 | 
 79 |             audio_filter = ','.join(filter_chain)
 80 | 
 81 |             ffmpeg_command = [
 82 |                 'ffmpeg',
 83 |                 '-i', audio_path,
 84 |                 '-af', audio_filter,
 85 |                 '-y',
 86 |                 ffmpeg_temp_path
 87 |             ]
 88 |             print(f"  Attempting FFmpeg enhancement: {' '.join(shlex.quote(arg) for arg in ffmpeg_command)}")
 89 |             result = subprocess.run(ffmpeg_command, capture_output=True, text=True, check=False)
 90 | 
 91 |             if result.returncode == 0 and os.path.exists(ffmpeg_temp_path) and os.path.getsize(ffmpeg_temp_path) > 44:
 92 |                 processed_audio_path = ffmpeg_temp_path
 93 |                 print(f"  -> SUCCESS: FFmpeg enhancement saved to: {os.path.basename(ffmpeg_temp_path)}")
 94 |             else:
 95 |                 print(f"  !! Warning: FFmpeg processing failed or produced empty file. Using original audio.")
 96 |                 print(f"     Return Code: {result.returncode}")
 97 |                 print(f"     Stderr: {result.stderr.strip()}")
 98 |                 if os.path.exists(ffmpeg_temp_path):
 99 |                     try: os.remove(ffmpeg_temp_path)
100 |                     except OSError: pass
101 |                 ffmpeg_temp_path = None
102 |         except FileNotFoundError:
103 |             print(f"  !! Error: 'ffmpeg' command not found. Skipping enhancement.")
104 |             if ffmpeg_temp_path and os.path.exists(ffmpeg_temp_path):
105 |                 try: os.remove(ffmpeg_temp_path)
106 |                 except OSError: pass
107 |             ffmpeg_temp_path = None
108 |         except Exception as ffmpeg_e:
109 |             print(f"  !! Warning: Error running FFmpeg processing: {ffmpeg_e}. Skipping enhancement.")
110 |             if ffmpeg_temp_path and os.path.exists(ffmpeg_temp_path):
111 |                 try: os.remove(ffmpeg_temp_path)
112 |                 except OSError: pass
113 |             ffmpeg_temp_path = None
114 |     else:
115 |         print("  -> Skipping FFmpeg enhancement as requested.")
116 | 
117 |     # --- Pydub Processing (Gain, Trim, Pad) ---
118 |     final_gain_factor = config.get('gain_factor', 1.0)
119 |     final_trim_end_ms = config.get('trim_end_ms', 0)
120 |     pad_end_ms = config.get('pad_end_ms', 0)
121 | 
122 |     if pydub_available:
123 |         try:
124 |             print(f"  Processing with pydub (Gain, Trim, Pad) on: {os.path.basename(processed_audio_path)}...")
125 |             segment = AudioSegment.from_wav(processed_audio_path)
126 |             samplerate = segment.frame_rate
127 | 
128 |             if final_gain_factor != 1.0 and final_gain_factor > 0:
129 |                 print(f"    -> Applying gain: {final_gain_factor:.2f}x")
130 |                 gain_db = 20 * np.log10(final_gain_factor)
131 |                 segment = segment + gain_db
132 | 
133 |             if final_trim_end_ms > 0 and len(segment) > final_trim_end_ms:
134 |                 print(f"    -> Trimming {final_trim_end_ms}ms from end.")
135 |                 segment = segment[:-final_trim_end_ms]
136 |             elif final_trim_end_ms > 0:
137 |                  print(f"    -> Warning: Segment length ({len(segment)}ms) is less than trim duration ({final_trim_end_ms}ms). Skipping trim.")
138 | 
139 |             if pad_end_ms > 0:
140 |                 print(f"    -> Padding {pad_end_ms}ms silence to end.")
141 |                 padding = AudioSegment.silent(duration=pad_end_ms, frame_rate=samplerate)
142 |                 segment = segment + padding
143 |             else:
144 |                  print(f"    -> No end padding requested (pad_end_ms={pad_end_ms}).")
145 | 
146 |             # Create a new temp file for the final pydub output
147 |             pydub_temp_fd, pydub_temp_path = tempfile.mkstemp(suffix="_pydub.wav", prefix="segment_", dir=temp_dir)
148 |             os.close(pydub_temp_fd)
149 | 
150 |             print(f"  -> Exporting final processed audio to {os.path.basename(pydub_temp_path)}")
151 |             segment.export(pydub_temp_path, format="wav")
152 |             duration = len(segment) / 1000.0
153 |             print(f"  -> Final segment saved ({duration:.2f}s, SR: {samplerate} Hz)")
154 |             
155 |             # Cleanup the intermediate FFmpeg file if it was created
156 |             if ffmpeg_temp_path and os.path.exists(ffmpeg_temp_path):
157 |                 try: os.remove(ffmpeg_temp_path)
158 |                 except OSError as e: print(f"  Warning: Could not remove ffmpeg temp file {ffmpeg_temp_path}: {e}")
159 | 
160 |             return pydub_temp_path, samplerate
161 | 
162 |         except Exception as pydub_e:
163 |             print(f"!! Error during pydub processing: {pydub_e}")
164 |             print(f"!! Falling back to using the pre-pydub audio: {os.path.basename(processed_audio_path)}")
165 |             # If pydub fails, try to copy the ffmpeg/initial file to the final path
166 |             try:
167 |                 final_fd, final_path_on_error = tempfile.mkstemp(suffix="_final_fallback.wav", prefix="segment_", dir=temp_dir)
168 |                 os.close(final_fd)
169 |                 shutil.copy2(processed_audio_path, final_path_on_error)
170 |                 # Need to get samplerate if we didn't get it from pydub
171 |                 if samplerate is None:
172 |                     with sf.SoundFile(final_path_on_error) as audio_info:
173 |                         samplerate = audio_info.samplerate
174 |                 
175 |                 # Cleanup the intermediate FFmpeg file if it was created
176 |                 if ffmpeg_temp_path and os.path.exists(ffmpeg_temp_path):
177 |                     try: os.remove(ffmpeg_temp_path)
178 |                     except OSError as e: print(f"  Warning: Could not remove ffmpeg temp file {ffmpeg_temp_path}: {e}")
179 | 
180 |                 return final_path_on_error, samplerate
181 |             except Exception as copy_e:
182 |                  print(f"!! Error copying fallback audio: {copy_e}")
183 |                  return None, None
184 |     else:
185 |         print("!! Pydub not available. Skipping gain, trim, and padding.")
186 |         # If pydub is not available, the processed_audio_path (from FFmpeg or initial) is the final one.
187 |         # We should copy it to a new temp file to ensure it's not the original input file.
188 |         try:
189 |             final_fd, final_path_no_pydub = tempfile.mkstemp(suffix="_final_nopydub.wav", prefix="segment_", dir=temp_dir)
190 |             os.close(final_fd)
191 |             shutil.copy2(processed_audio_path, final_path_no_pydub)
192 |             
193 |             # Cleanup the intermediate FFmpeg file if it was created
194 |             if ffmpeg_temp_path and os.path.exists(ffmpeg_temp_path):
195 |                 try: os.remove(ffmpeg_temp_path)
196 |                 except OSError as e: print(f"  Warning: Could not remove ffmpeg temp file {ffmpeg_temp_path}: {e}")
197 | 
198 |             return final_path_no_pydub, samplerate
199 |         except Exception as copy_e:
200 |              print(f"!! Error copying non-pydub audio: {copy_e}")
201 |              return None, None


--------------------------------------------------------------------------------
/functions/scraping/documents.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import PyPDF2 # For PDF processing
  3 | import docx # For DOCX processing
  4 | 
  5 | from ..utils import log_to_file # Import log_to_file from the utils module
  6 | 
  7 | def load_reference_documents(args):
  8 |     """
  9 |     Loads content from specified reference documents (txt, pdf, docx) or from a folder.
 10 |     Returns a list of dictionaries with 'path' and 'content'.
 11 |     """
 12 |     reference_docs_content = []
 13 |     processed_paths = set() # To avoid processing the same file twice if specified by both args
 14 | 
 15 |     # --- Load Reference Documents from comma-separated paths ---
 16 |     if args.reference_docs:
 17 |         print("\nLoading reference documents from paths...")
 18 |         log_to_file(f"Attempting to load reference documents from paths: {args.reference_docs}")
 19 |         ref_doc_paths = [p.strip() for p in args.reference_docs.split(',') if p.strip()]
 20 |         for doc_path in ref_doc_paths:
 21 |             full_doc_path = os.path.abspath(doc_path) # Get absolute path for consistent tracking
 22 |             if full_doc_path in processed_paths:
 23 |                 print(f"  - Skipping already processed document: {doc_path}")
 24 |                 log_to_file(f"Skipping already processed document: {doc_path}")
 25 |                 continue
 26 | 
 27 |             content = None
 28 |             try:
 29 |                 print(f"  - Processing reference document: {doc_path}")
 30 |                 if doc_path.lower().endswith('.pdf'):
 31 |                     # PDF processing
 32 |                     text_content = []
 33 |                     with open(doc_path, 'rb') as pdf_file: # Open in binary mode
 34 |                         reader = PyPDF2.PdfReader(pdf_file) # Use PdfReader
 35 |                         if reader.is_encrypted:
 36 |                              print(f"    - Warning: Skipping encrypted PDF: {doc_path}")
 37 |                              log_to_file(f"Warning: Skipping encrypted PDF: {doc_path}")
 38 |                              continue # Skip encrypted PDFs
 39 |                         for page in reader.pages:
 40 |                             page_text = page.extract_text()
 41 |                             if page_text: # Ensure text was extracted
 42 |                                 text_content.append(page_text)
 43 |                     content = "\n".join(text_content)
 44 |                     print(f"    - Extracted text from PDF.")
 45 |                 elif doc_path.lower().endswith('.docx'):
 46 |                     # DOCX processing
 47 |                     doc = docx.Document(doc_path)
 48 |                     text_content = [para.text for para in doc.paragraphs if para.text] # Filter empty paragraphs
 49 |                     content = "\n".join(text_content)
 50 |                     print(f"    - Extracted text from DOCX.")
 51 |                 else: # Assume plain text for .txt or unknown/other extensions
 52 |                     if not doc_path.lower().endswith('.txt'):
 53 |                          print(f"    - Warning: Unknown extension for '{doc_path}', attempting to read as plain text.")
 54 |                          log_to_file(f"Warning: Unknown extension for reference doc '{doc_path}', reading as text.")
 55 |                     with open(doc_path, 'r', encoding='utf-8') as f:
 56 |                         content = f.read()
 57 |                     print(f"    - Read as plain text.")
 58 | 
 59 |                 # Process extracted content
 60 |                 if content and content.strip():
 61 |                     reference_docs_content.append({"path": doc_path, "content": content.strip()})
 62 |                     processed_paths.add(full_doc_path)
 63 |                     print(f"    - Successfully loaded content ({len(content)} chars).")
 64 |                     log_to_file(f"Loaded reference doc: {doc_path} ({len(content)} chars)")
 65 |                 else:
 66 |                     print(f"    - Warning: No text content extracted or file is empty: {doc_path}")
 67 |                     log_to_file(f"Warning: Reference document {doc_path} empty or no text extracted.")
 68 | 
 69 |             except FileNotFoundError:
 70 |                 print(f"  - Error: Reference document file not found: {doc_path}")
 71 |                 log_to_file(f"Error: Reference document file not found: {doc_path}")
 72 |             except PyPDF2.errors.PdfReadError as pdf_err: # Catch specific PyPDF2 errors
 73 |                  print(f"  - Error reading PDF file {doc_path}: {pdf_err}")
 74 |                  log_to_file(f"Error reading PDF file {doc_path}: {pdf_err}")
 75 |             except Exception as e: # General catch-all
 76 |                 print(f"  - Error processing reference document {doc_path}: {e}")
 77 |                 log_to_file(f"Error processing reference document {doc_path}: {e} (Type: {type(e).__name__})")
 78 | 
 79 |         if not reference_docs_content and args.reference_docs:
 80 |             print("Warning: No valid reference documents were loaded from paths despite --reference-docs being set.")
 81 |             log_to_file("Warning: --reference-docs set, but no content loaded from paths.")
 82 | 
 83 | 
 84 |     # --- Load Reference Documents from Folder ---
 85 |     if args.reference_docs_folder:
 86 |         print(f"\nLoading reference documents from folder: {args.reference_docs_folder}")
 87 |         log_to_file(f"Attempting to load reference documents from folder: {args.reference_docs_folder}")
 88 |         if not os.path.isdir(args.reference_docs_folder):
 89 |             print(f"  - Error: Provided path is not a valid directory: {args.reference_docs_folder}")
 90 |             log_to_file(f"Error: --reference-docs-folder path is not a directory: {args.reference_docs_folder}")
 91 |         else:
 92 |             for filename in os.listdir(args.reference_docs_folder):
 93 |                 doc_path = os.path.join(args.reference_docs_folder, filename)
 94 |                 full_doc_path = os.path.abspath(doc_path) # Get absolute path for consistent tracking
 95 | 
 96 |                 if not os.path.isfile(doc_path):
 97 |                     continue # Skip subdirectories
 98 | 
 99 |                 if full_doc_path in processed_paths:
100 |                     print(f"  - Skipping already processed document: {doc_path}")
101 |                     log_to_file(f"Skipping already processed document: {doc_path}")
102 |                     continue
103 | 
104 |                 content = None
105 |                 file_ext = os.path.splitext(filename)[1].lower()
106 | 
107 |                 try:
108 |                     print(f"  - Processing reference document: {doc_path}")
109 |                     if file_ext == '.pdf':
110 |                         # PDF processing
111 |                         text_content = []
112 |                         with open(doc_path, 'rb') as pdf_file:
113 |                             reader = PyPDF2.PdfReader(pdf_file)
114 |                             if reader.is_encrypted:
115 |                                 print(f"    - Warning: Skipping encrypted PDF: {doc_path}")
116 |                                 log_to_file(f"Warning: Skipping encrypted PDF: {doc_path}")
117 |                                 continue
118 |                             for page in reader.pages:
119 |                                 page_text = page.extract_text()
120 |                                 if page_text:
121 |                                     text_content.append(page_text)
122 |                         content = "\n".join(text_content)
123 |                         print(f"    - Extracted text from PDF.")
124 |                     elif file_ext == '.docx':
125 |                         # DOCX processing
126 |                         doc = docx.Document(doc_path)
127 |                         text_content = [para.text for para in doc.paragraphs if para.text]
128 |                         content = "\n".join(text_content)
129 |                         print(f"    - Extracted text from DOCX.")
130 |                     elif file_ext == '.txt':
131 |                         # TXT processing
132 |                         with open(doc_path, 'r', encoding='utf-8') as f:
133 |                             content = f.read()
134 |                         print(f"    - Read as plain text.")
135 |                     else:
136 |                         print(f"    - Skipping unsupported file type: {filename}")
137 |                         log_to_file(f"Skipping unsupported file type in reference folder: {filename}")
138 |                         continue # Skip unsupported files
139 | 
140 |                     # Process extracted content
141 |                     if content and content.strip():
142 |                         reference_docs_content.append({"path": doc_path, "content": content.strip()})
143 |                         processed_paths.add(full_doc_path)
144 |                         print(f"    - Successfully loaded content ({len(content)} chars).")
145 |                         log_to_file(f"Loaded reference doc from folder: {doc_path} ({len(content)} chars)")
146 |                     else:
147 |                         print(f"    - Warning: No text content extracted or file is empty: {doc_path}")
148 |                         log_to_file(f"Warning: Reference document {doc_path} from folder is empty or no text extracted.")
149 | 
150 |                 except FileNotFoundError: # Should not happen with listdir unless race condition
151 |                     print(f"  - Error: Reference document file not found unexpectedly: {doc_path}")
152 |                     log_to_file(f"Error: Reference document file not found unexpectedly: {doc_path}")
153 |                 except PyPDF2.errors.PdfReadError as pdf_err:
154 |                     print(f"  - Error reading PDF file {doc_path}: {pdf_err}")
155 |                     log_to_file(f"Error reading PDF file {doc_path} from folder: {pdf_err}")
156 |                 except Exception as e:
157 |                     print(f"  - Error processing reference document {doc_path}: {e}")
158 |                     log_to_file(f"Error processing reference document {doc_path} from folder: {e} (Type: {type(e).__name__})")
159 | 
160 |         log_to_file(f"Finished processing reference documents folder. Total loaded: {len(reference_docs_content)}")
161 | 
162 |     if not reference_docs_content and (args.reference_docs or args.reference_docs_folder):
163 |          print("Warning: No valid reference documents were loaded from specified paths or folder.")
164 |          log_to_file("Warning: Reference docs/folder specified, but no content loaded.")
165 | 
166 | 
167 |     return reference_docs_content


--------------------------------------------------------------------------------
/functions/scraping/reddit.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | import random
  3 | import urllib.parse
  4 | import os # Import os module
  5 | import time
  6 | import random
  7 | import urllib.parse
  8 | from selenium import webdriver
  9 | from selenium.webdriver.common.by import By
 10 | from selenium.webdriver.chrome.service import Service as ChromeService
 11 | from selenium.webdriver.support.ui import WebDriverWait
 12 | from selenium.webdriver.support import expected_conditions as EC
 13 | from selenium.common.exceptions import TimeoutException, NoSuchElementException
 14 | 
 15 | from ..utils import log_to_file, USER_AGENTS # Import utilities
 16 | 
 17 | def scrape_reddit_source(subreddit_name, search_queries, args, seen_urls_global, source_scrape_limit):
 18 |     """
 19 |     Scrapes content from a specific subreddit using Selenium.
 20 |     Returns a list of scraped text content from posts/comments.
 21 |     Updates the seen_urls_global set.
 22 |     """
 23 |     print(f"  - Processing Reddit source '{subreddit_name}' using Selenium/old.reddit.com...")
 24 |     log_to_file(f"Initiating Selenium scrape for r/{subreddit_name}")
 25 |     driver = None
 26 |     all_post_links_for_subreddit = set()
 27 |     reddit_texts = [] # Store texts scraped from this source
 28 |     source_texts_count = 0 # Track count for this source
 29 | 
 30 |     # Determine the path to chromedriver within the virtual environment
 31 |     # This script is in Ecne-AI-Podcasterv2/functions/scraping/
 32 |     # The venv is in Ecne-AI-Podcasterv2/host_venv/
 33 |     # Chromedriver will be in Ecne-AI-Podcasterv2/host_venv/bin/
 34 |     script_dir = os.path.dirname(os.path.abspath(__file__))
 35 |     # Go up two directories (from functions/scraping to Ecne-AI-Podcasterv2)
 36 |     project_root = os.path.abspath(os.path.join(script_dir, '..', '..'))
 37 |     chromedriver_path = os.path.join(project_root, 'host_venv', 'bin', 'chromedriver')
 38 | 
 39 |     # Check if chromedriver exists at the expected path
 40 |     if not os.path.exists(chromedriver_path):
 41 |         print(f"    - ERROR: Chromedriver not found at expected path: {chromedriver_path}")
 42 |         log_to_file(f"Selenium Skip: Chromedriver not found at {chromedriver_path}")
 43 |         return [] # Return empty list if chromedriver is not found
 44 | 
 45 |     try:
 46 |         options = webdriver.ChromeOptions()
 47 |         options.add_argument('--headless'); options.add_argument('--no-sandbox'); options.add_argument('--disable-dev-shm-usage')
 48 |         options.add_argument(f'user-agent={random.choice(USER_AGENTS)}')
 49 |         
 50 |         # Use ChromeService to specify the executable path
 51 |         service = ChromeService(executable_path=chromedriver_path)
 52 |         driver = webdriver.Chrome(service=service, options=options)
 53 |         
 54 |         wait = WebDriverWait(driver, 20) # Consider making timeout configurable
 55 |         print("    - Selenium WebDriver initialized using venv chromedriver.")
 56 | 
 57 |         # --- Perform Search for Each Keyword Query ---
 58 |         for query_idx, search_query in enumerate(search_queries):
 59 |             print(f"      - Searching subreddit '{subreddit_name}' for query {query_idx+1}/{len(search_queries)}: '{search_query}'")
 60 |             try:
 61 |                 encoded_query = urllib.parse.quote_plus(search_query)
 62 |                 # Using old.reddit.com for potentially simpler structure
 63 |                 search_url = f"https://old.reddit.com/r/{subreddit_name}/search?q={encoded_query}&restrict_sr=on&sort=relevance&t=all"
 64 |                 print(f"        - Navigating to search URL: {search_url}")
 65 |                 driver.get(search_url)
 66 |                 time.sleep(random.uniform(2, 4)) # Allow page to load
 67 | 
 68 |                 print("        - Waiting for search results...")
 69 |                 wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "div.search-result-link, div.search-result"))) # General result container
 70 |                 link_elements = driver.find_elements(By.CSS_SELECTOR, "a.search-title") # Titles usually link to posts
 71 |                 print(f"        - Found {len(link_elements)} potential result links for this query.")
 72 | 
 73 |                 count = 0
 74 |                 for link_element in link_elements:
 75 |                     href = link_element.get_attribute('href')
 76 |                     # Ensure it's a comments link and not already seen
 77 |                     if href and '/comments/' in href and href not in all_post_links_for_subreddit:
 78 |                          all_post_links_for_subreddit.add(href)
 79 |                          count += 1
 80 |                 print(f"        - Added {count} new unique post links.")
 81 | 
 82 |             except TimeoutException:
 83 |                 print(f"        - Timeout waiting for search results for query: '{search_query}'")
 84 |                 log_to_file(f"Selenium Timeout waiting for search results: r/{subreddit_name}, Query: '{search_query}'")
 85 |             except Exception as search_e:
 86 |                 print(f"        - Error extracting search results for query '{search_query}': {search_e}")
 87 |                 log_to_file(f"Selenium Error extracting search results: r/{subreddit_name}, Query: '{search_query}': {search_e}")
 88 | 
 89 |             time.sleep(random.uniform(1, 2)) # Delay between searches
 90 | 
 91 |         # --- Scrape Collected Post Links ---
 92 |         unique_post_links = list(all_post_links_for_subreddit)
 93 |         print(f"    - Total unique post links found across all queries for '{subreddit_name}': {len(unique_post_links)}")
 94 |         links_to_scrape = unique_post_links[:source_scrape_limit] # Apply limit on *posts* to scrape
 95 |         print(f"    - Scraping top {len(links_to_scrape)} posts based on --max-reddit-results={source_scrape_limit}")
 96 | 
 97 |         if not links_to_scrape:
 98 |             print("    - No post links found to scrape for this subreddit.")
 99 | 
100 |         for post_url in links_to_scrape:
101 |             if post_url in seen_urls_global:
102 |                 print(f"      - Skipping already scraped URL (globally): {post_url}")
103 |                 continue
104 |             # Check limit *for this source* again (safe redundancy)
105 |             if source_texts_count >= source_scrape_limit:
106 |                 print(f"      - Reached post scrape limit ({source_scrape_limit}) for subreddit {subreddit_name}.")
107 |                 break # Stop scraping more posts for this subreddit
108 | 
109 |             print(f"      - Navigating to post: {post_url}")
110 |             try:
111 |                 driver.get(post_url)
112 |                 time.sleep(random.uniform(2, 4)) # Allow comments to load
113 | 
114 |                 post_title = "N/A"; post_body = ""; comment_texts = []
115 |                 # Extract Title (using old.reddit selector)
116 |                 try:
117 |                     title_element = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "p.title a.title")))
118 |                     post_title = title_element.text.strip()
119 |                 except (TimeoutException, NoSuchElementException): print("        - Warning: Could not find post title.")
120 | 
121 |                 # Extract Body (using old.reddit selector)
122 |                 try:
123 |                     body_elements = driver.find_elements(By.CSS_SELECTOR, "div.expando div.md")
124 |                     if body_elements: post_body = body_elements[0].text.strip()
125 |                 except NoSuchElementException: pass
126 |                 except Exception as body_e: print(f"        - Warning: Error extracting post body: {body_e}")
127 | 
128 |                 # Extract Comments (using old.reddit selector)
129 |                 try:
130 |                     comment_elements = driver.find_elements(By.CSS_SELECTOR, "div.commentarea .comment .md p")
131 |                     print(f"        - Found {len(comment_elements)} comment paragraphs. Scraping top {args.max_reddit_comments}.")
132 |                     for comment_element in comment_elements[:args.max_reddit_comments]: # Use args limit here
133 |                         comment_text = comment_element.text.strip()
134 |                         if comment_text: # Avoid empty paragraphs
135 |                             comment_texts.append(comment_text)
136 |                 except NoSuchElementException: pass
137 |                 except Exception as comment_e: print(f"        - Warning: Error extracting comments: {comment_e}")
138 | 
139 |                 # Combine content
140 |                 # Extract permalink from post_url for logging/reference
141 |                 permalink = post_url # Use post_url as permalink for old reddit
142 |                 full_content = f"Source: Reddit (r/{subreddit_name})\nPermalink: {permalink}\nTitle: {post_title}\n\nBody:\n{post_body}\n\nComments:\n" + "\n---\n".join(comment_texts)
143 |                 content_length = len(full_content)
144 |                 min_length = 150 # Minimum chars to be considered valid content
145 | 
146 |                 if content_length > min_length:
147 |                     reddit_texts.append(full_content.strip()) # Add to this source's list
148 |                     seen_urls_global.add(post_url) # Mark as scraped globally
149 |                     source_texts_count += 1 # Increment count for this source
150 |                     print(f"        - Success: Scraped content from post ({content_length} chars).")
151 |                     log_to_file(f"Selenium scrape success: {post_url} ({content_length} chars)")
152 |                 else:
153 |                     print(f"        - Warning: Scraped content ({content_length} chars) seems too short (min {min_length}). Skipping post.")
154 |                     log_to_file(f"Selenium scrape warning (too short): {post_url} ({content_length} chars)")
155 | 
156 |             except TimeoutException:
157 |                 print(f"      - Timeout loading post page: {post_url}")
158 |                 log_to_file(f"Selenium Timeout loading post page: {post_url}")
159 |             except Exception as post_e:
160 |                 print(f"      - Error processing post page {post_url}: {post_e}")
161 |                 log_to_file(f"Selenium Error processing post page {post_url}: {post_e}")
162 |             finally:
163 |                  time.sleep(random.uniform(1.5, 3)) # Delay between posts
164 | 
165 |     except Exception as selenium_e:
166 |         print(f"    - An error occurred during Selenium processing for r/{subreddit_name}: {selenium_e}")
167 |         log_to_file(f"Selenium Error processing source r/{subreddit_name}: {selenium_e}")
168 |     finally:
169 |         if driver:
170 |             print("    - Quitting Selenium WebDriver.")
171 |             driver.quit()
172 | 
173 |     print(f"  - Finished processing Reddit source r/{subreddit_name}. Scraped {source_texts_count} piece(s).")
174 |     return reddit_texts


--------------------------------------------------------------------------------
/templates/script_builder_form.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html lang="en">
  3 | <head>
  4 |     <meta charset="UTF-8">
  5 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
  6 |     <title>AI Podcast Generator</title>
  7 |     <link rel="stylesheet" href="{{ url_for('static', filename='main.css') }}">
  8 | </head>
  9 | <body>
 10 |     <div class="container">
 11 |         <h1>AI Podcast Generator</h1>
 12 | 
 13 |         <nav>
 14 |             <a href="{{ url_for('index') }}" class="back-button">Back to Dashboard</a>
 15 |         </nav>
 16 | 
 17 |         <button type="button" id="easy-mode-button" class="easy-mode-button">✨ Easy Mode: AI Topic/Keywords/Guidance</button>
 18 | 
 19 |         <form id="script-form">
 20 |             <div class="form-group">
 21 |                 <label for="topic">Topic:</label>
 22 |                 <input type="text" id="topic" name="topic" required>
 23 |             </div>
 24 | 
 25 |             <div class="form-group">
 26 |                 <label for="keywords">Keywords (comma-separated):</label>
 27 |                 <input type="text" id="keywords" name="keywords">
 28 |             </div>
 29 | 
 30 |             <div class="form-group">
 31 |                 <label for="guidance">Guidance:</label>
 32 |                 <textarea id="guidance" name="guidance" rows="4"></textarea>
 33 |             </div>
 34 | 
 35 |             <div class="form-group">
 36 |                 <label for="api">Search API:</label>
 37 |                 <select id="api" name="api">
 38 |                     <option value="google">Google</option>
 39 |                     <option value="brave">Brave</option>
 40 |                 </select>
 41 |             </div>
 42 | 
 43 |             <div class="form-group">
 44 |                 <label for="llm-model">LLM Model:</label>
 45 |                 <select id="llm-model" name="llm-model">
 46 |                     <!-- Options will be populated dynamically by JavaScript -->
 47 |                     <option value="">Select a model</option>
 48 |                 </select>
 49 |             </div>
 50 |             <div class="form-group checkbox-group">
 51 |                <input type="checkbox" id="single-speaker" name="single-speaker">
 52 |                <label for="single-speaker">Single Speaker Mode (Host only, no Guest)</label>
 53 |             </div>
 54 | 
 55 |             <details class="form-group">
 56 |                 <summary>Additional Options</summary>
 57 | 
 58 |                 <div class="form-group">
 59 |                     <label for="character-host">Host Profile Name:</label>
 60 |                     <input type="text" id="character-host" name="character-host" value="host" required>
 61 |                 </div>
 62 | 
 63 |                 <div class="form-group">
 64 |                     <label for="character-guest">Guest Profile Name:</label>
 65 |                     <input type="text" id="character-guest" name="character-guest" value="guest" required>
 66 |                 </div>
 67 | 
 68 |                 <div class="form-group">
 69 |                     <label for="from_date">From Date:</label>
 70 |                     <input type="date" id="from_date" name="from_date">
 71 |                 </div>
 72 | 
 73 |                 <div class="form-group">
 74 |                     <label for="to_date">To Date:</label>
 75 |                     <input type="date" id="to_date" name="to_date">
 76 |                 </div>
 77 | 
 78 |                 <div class="form-group">
 79 |                     <label for="max-web-results">Max Web Results:</label>
 80 |                     <input type="number" id="max-web-results" name="max-web-results" value="3" min="1">
 81 |                 </div>
 82 | 
 83 |                 <div class="form-group">
 84 |                     <label for="max-reddit-results">Max Reddit Posts:</label>
 85 |                     <input type="number" id="max-reddit-results" name="max-reddit-results" value="5" min="0">
 86 |                 </div>
 87 | 
 88 |                 <div class="form-group">
 89 |                     <label for="max-reddit-comments">Max Reddit Comments per Post:</label>
 90 |                     <input type="number" id="max-reddit-comments" name="max-reddit-comments" value="5" min="0">
 91 |                 </div>
 92 | 
 93 |                 <div class="form-group">
 94 |                     <label for="per-keyword-results">Per Keyword Results (Optional):</label>
 95 |                     <input type="number" id="per-keyword-results" name="per-keyword-results" min="1">
 96 |                 </div>
 97 | 
 98 |                 <div class="form-group">
 99 |                     <label for="score-threshold">Score Threshold (0-10):</label>
100 |                     <input type="number" id="score-threshold" name="score-threshold" value="5" min="0" max="10">
101 |                 </div>
102 | 
103 |                 <div class="form-group">
104 |                     <label for="ai-timeout">AI Timeout (seconds):</label>
105 |                     <input type="number" id="ai-timeout" name="ai-timeout" value="120" min="10">
106 |                 </div>
107 | 
108 |                 <div class="form-group">
109 |                     <label for="ai-retries">AI Retries:</label>
110 |                     <input type="number" id="ai-retries" name="ai-retries" value="5" min="0">
111 |                 </div>
112 | 
113 |                 <div class="form-group checkbox-group">
114 |                     <input type="checkbox" id="combine-keywords" name="combine-keywords">
115 |                     <label for="combine-keywords">Combine Keywords</label>
116 |                 </div>
117 | 
118 |                 <div class="form-group checkbox-group">
119 |                     <input type="checkbox" id="no-search" name="no-search">
120 |                     <label for="no-search">No Search (Use only provided documents/articles)</label>
121 |                 </div>
122 | 
123 |                 <div class="form-group checkbox-group">
124 |                     <input type="checkbox" id="reference-docs-summarize" name="reference-docs-summarize">
125 |                     <label for="reference-docs-summarize">Summarize Reference Docs</label>
126 |                 </div>
127 | 
128 |                 <div class="form-group checkbox-group">
129 |                     <input type="checkbox" id="skip-refinement" name="skip-refinement">
130 |                     <label for="skip-refinement">Skip Script Refinement</label>
131 |                 </div>
132 | 
133 |                 <div class="form-group checkbox-group">
134 |                     <input type="checkbox" id="no-reddit" name="no-reddit">
135 |                     <label for="no-reddit">Exclude Reddit</label>
136 |                 </div>
137 | 
138 |                  <div class="form-group checkbox-group">
139 |                     <input type="checkbox" id="report" name="report" checked>
140 |                     <label for="report">Generate Report (Optional)</label>
141 |                  </div>
142 | 
143 |                  <div class="form-group checkbox-group">
144 |                     <input type="checkbox" id="youtube-description" name="youtube-description" checked>
145 |                     <label for="youtube-description">Generate YouTube Description (Optional)</label>
146 |                  </div>
147 |             </details>
148 | 
149 | 
150 |             <div class="form-group">
151 |                 <label for="reference-docs">Reference Documents (Drag & Drop Files):</label>
152 |                 <div id="reference-docs-drop-area" class="drop-area">Drag and drop files here</div>
153 |                 <ul id="reference-docs-list"></ul>
154 |                 <input type="file" id="reference-docs" name="reference-docs" multiple style="display: none;">
155 |             </div>
156 | 
157 |              <div class="form-group">
158 |                 <label for="direct-articles">Direct Articles File (Drag & Drop .txt file):</label>
159 |                 <div id="direct-articles-drop-area" class="drop-area">Drag and drop a .txt file here</div>
160 |                  <p id="direct-articles-file-path"></p>
161 |                 <input type="file" id="direct-articles" name="direct-articles" accept=".txt" style="display: none;">
162 |             </div>
163 | 
164 | 
165 |             <button type="submit" id="generate-script-button">Generate Script</button>
166 |             <div id="script-progress-modal" class="modal-overlay" style="display: none;">
167 |                 <div class="modal-content">
168 |                     <h2 id="script-progress-message">Initializing...</h2>
169 |                     <div id="script-spinner" class="spinner"></div>
170 |                     <pre id="script-console-output"></pre>
171 |                     <p>Time Elapsed: <span id="script-timer">00:00:00</span></p>
172 |                     <div id="script-results" style="display: none;">
173 |                         <h2>Script Generation Results</h2>
174 |                         <p>Script generated successfully!</p>
175 |                         <div id="script-output-links"></div>
176 |                     </div>
177 |                     <button type="button" id="stop-script-button" class="button" style="background-color: #dc3545;">Stop Script Generation</button>
178 |                     <button type="button" id="close-script-modal-button" class="button" style="display: none;">OK</button>
179 |                 </div>
180 |             </div>
181 |         </form>
182 |     </div>
183 | 
184 |     <script src="{{ url_for('static', filename='main.js') }}"></script>
185 | 
186 |     <!-- Easy Mode Popup -->
187 |     <div id="easy-mode-modal" class="modal">
188 |         <div class="modal-content">
189 |             <span class="close-button">&times;</span>
190 |             <h2>Easy Mode: Describe Your Podcast Idea</h2>
191 |             <p>Please provide a detailed description of what you want your podcast to be about. The AI will generate a topic, keywords, and guidance for the script.</p>
192 |             <textarea id="research-description" rows="8" placeholder="E.g., 'I want to create a podcast episode about the history of video games, focusing on the console wars of the 90s and the rise of 3D gaming.'"></textarea>
193 |             <div class="form-group">
194 |                 <label for="easy-mode-llm-model">LLM Model:</label>
195 |                 <select id="easy-mode-llm-model" name="easy-mode-llm-model">
196 |                     <!-- Options will be populated dynamically by JavaScript -->
197 |                     <option value="">Select a model</option>
198 |                 </select>
199 |             </div>
200 |             <button type="button" id="submit-research-description">Generate AI Suggestions</button>
201 |             <div id="easy-mode-status" style="display: none; align-items: center; justify-content: center; margin-top: 10px;">
202 |                 <div id="easy-mode-spinner" class="spinner"></div>
203 |                 <span id="easy-mode-message" style="margin-left: 10px; font-weight: bold;">Generating...</span>
204 |             </div>
205 |         </div>
206 |     </div>
207 | </body>
208 | </html>
209 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/functions/tts/gui/player.py:
--------------------------------------------------------------------------------
  1 | import tkinter as tk
  2 | from tkinter import ttk, messagebox
  3 | import threading
  4 | import time
  5 | import os
  6 | import soundfile as sf
  7 | import pygame
  8 | 
  9 | # Constants (copied from orpheus_tts.py, consider centralizing if used elsewhere)
 10 | # SCRIPT_DIR = os.path.dirname(__file__) # This would be functions/tts/gui
 11 | # IMAGE_DIR = os.path.abspath(os.path.join(SCRIPT_DIR, "..", "..", "..", "settings/images"))
 12 | # MUSIC_DIR = os.path.abspath(os.path.join(SCRIPT_DIR, "..", "..", "..", "settings/music"))
 13 | # VOICE_DIR = os.path.abspath(os.path.join(SCRIPT_DIR, "..", "..", "..", "settings/voices"))
 14 | 
 15 | # Initialize pygame mixer (should ideally be done once at application start)
 16 | try:
 17 |     pygame.mixer.init()
 18 |     if not pygame.mixer.get_init():
 19 |         print("Warning: Pygame mixer failed to initialize.")
 20 |         pygame = None # Treat as if pygame is not available
 21 | except ImportError:
 22 |     print("Warning: 'pygame' library not found. pip install pygame")
 23 |     pygame = None
 24 | 
 25 | class AudioPlayer(ttk.Frame):
 26 |     def __init__(self, parent, redo_command=None, waveform_ax=None, waveform_canvas_agg=None):
 27 |         super().__init__(parent)
 28 |         self.redo_command = redo_command
 29 |         self.waveform_ax = waveform_ax
 30 |         self.waveform_canvas_agg = waveform_canvas_agg
 31 |         self.progress_line = None
 32 | 
 33 |         self.current_file = None
 34 |         self.is_playing = False
 35 |         self.current_pos = 0
 36 | 
 37 |         self.controls_frame = ttk.Frame(self)
 38 |         self.controls_frame.pack(side=tk.TOP, fill=tk.X, padx=5, pady=(5, 2))
 39 | 
 40 |         self.play_btn = ttk.Button(self.controls_frame, text="Play", width=5, command=self.toggle_play, state=tk.DISABLED)
 41 |         self.play_btn.pack(side=tk.LEFT, padx=2)
 42 | 
 43 |         self.stop_btn = ttk.Button(self.controls_frame, text="Stop", width=5, command=self.stop, state=tk.DISABLED)
 44 |         self.stop_btn.pack(side=tk.LEFT, padx=2)
 45 | 
 46 |         self.redo_btn = ttk.Button(self.controls_frame, text="Redo", width=5, command=self.redo_command, state=tk.DISABLED)
 47 |         self.redo_btn.pack(side=tk.LEFT, padx=2)
 48 | 
 49 |         self.progress_frame = ttk.Frame(self)
 50 |         self.progress_frame.pack(side=tk.TOP, fill=tk.X, padx=5, pady=(0, 5))
 51 | 
 52 |         self.time_var = tk.StringVar(value="00:00 / 00:00")
 53 |         self.time_label = ttk.Label(self.progress_frame, textvariable=self.time_var)
 54 |         self.time_label.pack(side=tk.RIGHT, padx=5)
 55 | 
 56 |         self.update_thread = None
 57 | 
 58 |     def load_file(self, filepath):
 59 |         self.stop()
 60 |         self.current_file = None
 61 |         self.time_var.set("00:00 / 00:00")
 62 |         self.play_btn.configure(state=tk.DISABLED)
 63 |         self.stop_btn.configure(state=tk.DISABLED)
 64 |         self._update_progress_line(0)
 65 | 
 66 |         if not pygame:
 67 |             print("AudioPlayer: Pygame not available.")
 68 |             return False
 69 | 
 70 |         if not filepath:
 71 |             print("AudioPlayer: No file path provided.")
 72 |             return False
 73 | 
 74 |         print(f"AudioPlayer: Attempting to load audio file: {filepath}")
 75 |         if not os.path.exists(filepath):
 76 |              print(f"AudioPlayer: Error - File does not exist: {filepath}")
 77 |              return False
 78 | 
 79 |         try:
 80 |             info = sf.info(filepath)
 81 |             duration = info.frames / info.samplerate
 82 |             print(f"AudioPlayer: Duration calculated via soundfile: {duration:.2f}s")
 83 | 
 84 |             pygame.mixer.music.load(filepath)
 85 |             print(f"AudioPlayer: Pygame loaded: {filepath}")
 86 | 
 87 |             self.current_file = filepath
 88 |             self.duration = duration
 89 |             self.update_time_label(0, duration)
 90 |             self.play_btn.configure(state=tk.NORMAL)
 91 |             self.stop_btn.configure(state=tk.NORMAL)
 92 |             self._update_progress_line(0)
 93 |             return True
 94 | 
 95 |         except Exception as e:
 96 |             print(f"AudioPlayer: Error loading audio file {filepath}: {e}")
 97 |             self.current_file = None
 98 |             return False
 99 | 
100 |     def toggle_play(self):
101 |         if not self.current_file or not pygame or not pygame.mixer.get_init():
102 |             return
103 | 
104 |         if self.is_playing:
105 |             try:
106 |                 pygame.mixer.music.pause()
107 |                 self.play_btn.configure(text="Play")
108 |                 self.is_playing = False
109 |             except Exception as e:
110 |                  print(f"AudioPlayer: Error pausing music: {e}")
111 |         else:
112 |             try:
113 |                 if not pygame.mixer.music.get_busy():
114 |                      print("AudioPlayer: Music not busy, reloading and playing from start/seek pos.")
115 |                      pygame.mixer.music.load(self.current_file)
116 |                      pygame.mixer.music.play(start=self.current_pos)
117 |                 else:
118 |                      pygame.mixer.music.unpause()
119 | 
120 |                 self.play_btn.configure(text="Pause")
121 |                 self.is_playing = True
122 | 
123 |                 if not self.update_thread or not self.update_thread.is_alive():
124 |                     self.update_thread = threading.Thread(target=self.update_progress, daemon=True)
125 |                     self.update_thread.start()
126 |             except Exception as e:
127 |                 messagebox.showerror("Playback Error", f"Error playing audio: {str(e)}")
128 |                 print(f"AudioPlayer: Error playing/unpausing music: {e}")
129 |                 self.is_playing = False
130 |                 self.play_btn.configure(text="Play")
131 |                 return
132 | 
133 |     def stop(self):
134 |         if pygame and pygame.mixer.get_init():
135 |             try:
136 |                 pygame.mixer.music.stop()
137 |                 pygame.mixer.music.unload()
138 |             except Exception as e:
139 |                 print(f"AudioPlayer: Error stopping/unloading music: {e}")
140 | 
141 |         self.is_playing = False
142 |         self.current_pos = 0
143 |         self.play_btn.configure(text="Play")
144 |         if self.current_file:
145 |              self.play_btn.configure(state=tk.NORMAL)
146 |         else:
147 |              self.play_btn.configure(state=tk.DISABLED)
148 |         self.update_time_label(0, getattr(self, 'duration', 0))
149 |         self._update_progress_line(0)
150 |         if self.current_file:
151 |             self.stop_btn.configure(state=tk.NORMAL)
152 |         else:
153 |             self.stop_btn.configure(state=tk.DISABLED)
154 | 
155 |     def seek_to_time(self, target_time):
156 |         """Seeks playback to the specified time (in seconds)."""
157 |         if not self.current_file or not pygame or not pygame.mixer.get_init():
158 |             return
159 |         if target_time < 0 or target_time > self.duration:
160 |             print(f"AudioPlayer: Invalid seek time: {target_time:.2f}s")
161 |             return
162 | 
163 |         print(f"AudioPlayer: Seek requested to {target_time:.2f}s")
164 |         was_playing = self.is_playing
165 |         self.is_playing = False
166 | 
167 |         try:
168 |             pygame.mixer.music.stop()
169 |             pygame.mixer.music.load(self.current_file)
170 |             pygame.mixer.music.play(start=target_time)
171 |             self.current_pos = target_time
172 |             self.is_playing = True
173 |             self.play_btn.configure(text="Pause")
174 |             self._update_progress_line(target_time)
175 | 
176 |             if not self.update_thread or not self.update_thread.is_alive():
177 |                 print("AudioPlayer: Restarting update thread after seek.")
178 |                 self.update_thread = threading.Thread(target=self.update_progress, daemon=True)
179 |                 self.update_thread.start()
180 | 
181 |         except Exception as e:
182 |             print(f"AudioPlayer: Error seeking and playing: {e}")
183 |             messagebox.showerror("Seek Error", f"Error seeking audio: {e}")
184 |             self.stop()
185 | 
186 |     def update_progress(self):
187 |         print("AudioPlayer: Starting update_progress loop.")
188 |         while pygame and pygame.mixer.get_init() and self.current_file:
189 |             if not self.is_playing:
190 |                  print("AudioPlayer: is_playing is False, breaking loop.")
191 |                  break
192 |             try:
193 |                 current_playback_time = pygame.mixer.music.get_pos() / 1000.0
194 |                 display_pos = self.current_pos + current_playback_time
195 | 
196 |                 if current_playback_time < 0:
197 |                      if not pygame.mixer.music.get_busy():
198 |                           print("AudioPlayer: Playback finished (get_busy is False).")
199 |                           self.is_playing = False
200 |                           self.play_btn.configure(text="Play")
201 |                           self.current_pos = 0
202 |                           print("AudioPlayer: Exiting loop after playback finished.")
203 |                           break
204 |                      else:
205 |                           time.sleep(0.1)
206 |                           continue
207 | 
208 |                 if display_pos >= self.duration:
209 |                     display_pos = self.duration
210 |                     if not pygame.mixer.music.get_busy():
211 |                         self.is_playing = False
212 |                         self.play_btn.configure(text="Play")
213 |                         self.current_pos = 0
214 |                         self._update_progress_line(0)
215 |                         self.update_time_label(0, self.duration)
216 |                         print("AudioPlayer: Exiting loop after playback finished naturally.")
217 |                         break
218 | 
219 |                 self._update_progress_line(display_pos)
220 |                 self.update_time_label(display_pos, self.duration)
221 | 
222 |             except Exception as e:
223 |                 if isinstance(e, pygame.error) and "mixer not initialized" in str(e):
224 |                      print("AudioPlayer: Mixer became uninitialized during update.")
225 |                      self.is_playing = False
226 |                      break
227 |                 print(f"AudioPlayer: Error in update_progress: {type(e).__name__} - {e}")
228 |                 self.is_playing = False
229 |                 break
230 | 
231 |             time.sleep(0.1)
232 |         print("AudioPlayer: Exited update_progress loop.")
233 | 
234 |     def update_time_label(self, current, total):
235 |          try:
236 |             total = max(0, total)
237 |             current = max(0, min(current, total))
238 |             current_str = time.strftime("%M:%S", time.gmtime(current))
239 |             total_str = time.strftime("%M:%S", time.gmtime(total))
240 |             self.time_var.set(f"{current_str} / {total_str}")
241 |          except ValueError as e:
242 |             print(f"AudioPlayer: ValueError updating time label: {e}. Current: {current}, Total: {total}")
243 |             self.time_var.set("--:-- / --:--")
244 | 
245 |     def _update_progress_line(self, time_pos):
246 |         """Updates the vertical progress line on the waveform plot."""
247 |         if self.waveform_ax and self.waveform_canvas_agg:
248 |             try:
249 |                 xlim = self.waveform_ax.get_xlim()
250 |                 time_pos = max(xlim[0], min(time_pos, xlim[1]))
251 |             except Exception:
252 |                  pass
253 | 
254 |             if self.progress_line is None:
255 |                 if self.waveform_ax.lines:
256 |                      self.progress_line = self.waveform_ax.axvline(time_pos, color='r', linestyle='--', linewidth=1, label='_nolegend_')
257 |                 else:
258 |                      self.progress_line = None
259 |                      return
260 |             elif self.progress_line in self.waveform_ax.lines:
261 |                 self.progress_line.set_xdata([time_pos, time_pos])
262 |             else:
263 |                  self.progress_line = self.waveform_ax.axvline(time_pos, color='r', linestyle='--', linewidth=1, label='_nolegend_')
264 | 
265 |             try:
266 |                 self.waveform_canvas_agg.draw_idle()
267 |             except Exception as e:
268 |                 print(f"AudioPlayer: Error drawing progress line: {e}")
269 | 
270 |     def cleanup(self):
271 |         """Stop playback and cleanup resources"""
272 |         print("AudioPlayer: Cleanup called.")
273 |         self.is_playing = False
274 |         self.stop()


--------------------------------------------------------------------------------
/static/main.css:
--------------------------------------------------------------------------------
  1 | body {
  2 |     font-family: sans-serif;
  3 |     line-height: 1.6;
  4 |     margin: 0;
  5 |     padding: 20px;
  6 |     background-color: #f4f4f4;
  7 |     color: #333;
  8 | }
  9 | 
 10 | .container {
 11 |     max-width: 960px;
 12 |     margin: 0 auto;
 13 |     background: #fff;
 14 |     padding: 25px 40px;
 15 |     border-radius: 10px;
 16 |     box-shadow: 0 4px 12px rgba(0, 0, 0, 0.1);
 17 | }
 18 | 
 19 | h1, h2 {
 20 |     color: #0056b3;
 21 |     border-bottom: 2px solid #eee;
 22 |     padding-bottom: 10px;
 23 |     margin-bottom: 20px;
 24 | }
 25 | 
 26 | .form-group {
 27 |     margin-bottom: 20px;
 28 | }
 29 | 
 30 | .form-group label {
 31 |     display: block;
 32 |     margin-bottom: 5px;
 33 |     font-weight: bold;
 34 | }
 35 | 
 36 | .form-group input[type="text"],
 37 | .form-group input[type="number"],
 38 | .form-group input[type="date"],
 39 | .form-group select,
 40 | .form-group textarea {
 41 |     width: 100%;
 42 |     padding: 10px; /* Increased padding */
 43 |     border: 1px solid #ccc;
 44 |     border-radius: 5px; /* Slightly more rounded corners */
 45 |     box-sizing: border-box;
 46 |     transition: border-color 0.3s, box-shadow 0.3s; /* Added transition */
 47 | }
 48 | 
 49 | .form-group input[type="text"]:focus,
 50 | .form-group input[type="number"]:focus,
 51 | .form-group input[type="date"]:focus,
 52 | .form-group select:focus,
 53 | .form-group textarea:focus {
 54 |     border-color: #007bff;
 55 |     box-shadow: 0 0 5px rgba(0, 123, 255, 0.5);
 56 |     outline: none;
 57 | }
 58 | 
 59 | .checkbox-group {
 60 |     display: flex;
 61 |     align-items: center;
 62 |     margin-bottom: 10px;
 63 | }
 64 | 
 65 | .checkbox-group input[type="checkbox"] {
 66 |     margin-right: 10px;
 67 | }
 68 | 
 69 | /* Styles for the main dashboard menu grid */
 70 | .menu-grid {
 71 |     display: grid;
 72 |     grid-template-columns: repeat(auto-fit, minmax(250px, 1fr));
 73 |     gap: 20px;
 74 |     margin-top: 30px;
 75 | }
 76 | 
 77 | .menu-item {
 78 |     background-color: #e9f5ff;
 79 |     border: 1px solid #b3e0ff;
 80 |     border-radius: 8px;
 81 |     padding: 20px;
 82 |     text-align: center;
 83 |     text-decoration: none;
 84 |     color: #0056b3;
 85 |     transition: background-color 0.3s ease, transform 0.3s ease;
 86 |     box-shadow: 0 2px 5px rgba(0, 0, 0, 0.05);
 87 | }
 88 | 
 89 | .menu-item:hover {
 90 |     background-color: #cce7ff;
 91 |     transform: translateY(-5px);
 92 | }
 93 | 
 94 | .menu-item h2 {
 95 |     margin-top: 0;
 96 |     margin-bottom: 10px;
 97 |     color: #0056b3;
 98 |     border-bottom: none;
 99 |     padding-bottom: 0;
100 | }
101 | 
102 | .menu-item p {
103 |     font-size: 0.9em;
104 |     color: #333;
105 | }
106 | 
107 | /* Style for the collapsible button (from podcast_gui.css) */
108 | .collapsible {
109 |     background-color: #f1f1f1;
110 |     color: #444;
111 |     cursor: pointer;
112 |     padding: 18px;
113 |     width: 100%;
114 |     border: none;
115 |     text-align: left;
116 |     outline: none;
117 |     font-size: 15px;
118 |     transition: 0.4s;
119 |     margin-top: 20px;
120 |     border-radius: 4px;
121 | }
122 | 
123 | .active, .collapsible:hover {
124 |     background-color: #ccc;
125 | }
126 | 
127 | /* Style for the collapsible content (from podcast_gui.css) */
128 | .content {
129 |     padding: 0 18px;
130 |     background-color: white;
131 |     max-height: 0;
132 |     overflow: hidden;
133 |     transition: max-height 0.2s ease-out;
134 |     border: 1px solid #eee;
135 |     border-top: none;
136 |     border-radius: 0 0 4px 4px;
137 |     margin-bottom: 20px;
138 | }
139 | 
140 | .content h3 {
141 |     color: #0056b3;
142 |     border-bottom: 1px solid #eee;
143 |     padding-bottom: 5px;
144 |     margin-top: 20px;
145 |     margin-bottom: 15px;
146 | }
147 | 
148 | /* Drop area styles (from style.css) */
149 | .drop-area {
150 |     border: 2px dashed #ccc;
151 |     border-radius: 4px;
152 |     padding: 20px;
153 |     text-align: center;
154 |     cursor: pointer;
155 |     margin-top: 10px;
156 |     background-color: #f9f9f9;
157 | }
158 | 
159 | .drop-area.highlight {
160 |     border-color: #007bff;
161 |     background-color: #e9e9e9;
162 | }
163 | 
164 | #reference-docs-list {
165 |     list-style: none;
166 |     padding: 0;
167 |     margin-top: 10px;
168 | }
169 | 
170 | #reference-docs-list li {
171 |     background-color: #e9e9e9;
172 |     padding: 8px;
173 |     margin-bottom: 5px;
174 |     border-radius: 4px;
175 |     display: flex;
176 |     justify-content: space-between;
177 |     align-items: center;
178 | }
179 | 
180 | #reference-docs-list li .remove-file {
181 |     color: #dc3545;
182 |     cursor: pointer;
183 |     font-weight: bold;
184 | }
185 | 
186 | button, .button {
187 |     background-color: #007bff;
188 |     color: white;
189 |     padding: 12px 25px;
190 |     border: none;
191 |     border-radius: 5px;
192 |     cursor: pointer;
193 |     font-size: 16px;
194 |     margin-top: 10px;
195 |     transition: background-color 0.3s, transform 0.2s;
196 |     text-transform: uppercase;
197 |     letter-spacing: 1px;
198 |     font-weight: bold;
199 | }
200 | 
201 | button:hover, .button:hover {
202 |     background-color: #0056b3;
203 |     transform: translateY(-2px);
204 | }
205 | 
206 | details {
207 |     border: 1px solid #eee;
208 |     border-radius: 5px;
209 |     margin-bottom: 20px;
210 |     background-color: #f9f9f9;
211 | }
212 | 
213 | summary {
214 |     padding: 15px;
215 |     font-weight: bold;
216 |     cursor: pointer;
217 |     outline: none;
218 |     color: #0056b3;
219 | }
220 | 
221 | details[open] {
222 |     background-color: #fff;
223 | }
224 | 
225 | details[open] summary {
226 |     border-bottom: 1px solid #eee;
227 | }
228 | 
229 | details .form-group {
230 |     padding: 0 15px;
231 | }
232 | 
233 | details .form-group:first-of-type {
234 |     padding-top: 15px;
235 | }
236 | 
237 | details .form-group:last-of-type {
238 |     padding-bottom: 15px;
239 |     margin-bottom: 0;
240 | }
241 | 
242 | /* Output console styles */
243 | #output pre,
244 | #script-console-output,
245 | #podcast-console-output {
246 |     background-color: #333;
247 |     color: #f4f4f4;
248 |     padding: 15px;
249 |     border-radius: 4px;
250 |     overflow-x: auto;
251 |     white-space: pre-wrap;
252 |     word-wrap: break-word;
253 |     max-height: 300px; /* Fixed height */
254 |     overflow-y: auto; /* Scrollable */
255 |     text-align: left;
256 |     margin-top: 20px;
257 | }
258 | 
259 | #results {
260 |     margin-top: 20px;
261 |     padding: 15px;
262 |     background-color: #d4edda;
263 |     color: #155724;
264 |     border: 1px solid #c3e6cb;
265 |     border-radius: 4px;
266 | }
267 | 
268 | #output-links a, #report-links a { /* Combined from both CSS files */
269 |     display: block; /* Changed from inline-block for better history display */
270 |     margin-top: 10px;
271 |     color: #004085;
272 |     text-decoration: none;
273 |     font-weight: bold;
274 | }
275 | 
276 | #output-links a:hover, #report-links a:hover {
277 |     text-decoration: underline;
278 | }
279 | 
280 | nav a {
281 |     margin-right: 15px;
282 |     text-decoration: none;
283 |     color: #007bff;
284 | }
285 | 
286 | nav a:hover {
287 |     text-decoration: underline;
288 | }
289 | 
290 | .settings-section {
291 |     margin-bottom: 30px;
292 |     padding-bottom: 20px;
293 |     border-bottom: 1px dashed #ccc;
294 | }
295 | 
296 | .settings-section:last-child {
297 |     border-bottom: none;
298 | }
299 | 
300 | #llm-model-details {
301 |     border: 1px solid #eee;
302 |     padding: 15px;
303 |     margin-top: 15px;
304 |     border-radius: 4px;
305 |     background-color: #f9f9f9;
306 | }
307 | 
308 | #llm-model-details .form-group input[readonly] {
309 |     background-color: #e9e9e9;
310 | }
311 | 
312 | #delete-llm-model {
313 |     background-color: #dc3545;
314 | }
315 | 
316 | #delete-llm-model:hover {
317 |     background-color: #c82333;
318 | }
319 | 
320 | /* Modal Overlay (from podcast_gui.css) */
321 | .modal-overlay {
322 |     position: fixed;
323 |     top: 0;
324 |     left: 0;
325 |     width: 100%;
326 |     height: 100%;
327 |     background-color: rgba(0, 0, 0, 0.7);
328 |     display: flex;
329 |     justify-content: center;
330 |     align-items: center;
331 |     z-index: 1000;
332 | }
333 | 
334 | .modal-content {
335 |     background-color: #fff;
336 |     padding: 30px;
337 |     border-radius: 8px;
338 |     box-shadow: 0 0 20px rgba(0, 0, 0, 0.3);
339 |     text-align: center;
340 |     max-width: 500px;
341 |     width: 90%;
342 | }
343 | 
344 | .modal-content h2 {
345 |     color: #0056b3;
346 |     margin-top: 0;
347 | }
348 | 
349 | .modal-content p {
350 |     font-size: 1.1em;
351 |     margin-bottom: 20px;
352 | }
353 | 
354 | /* Spinner animation (combined and adjusted for size) */
355 | .spinner {
356 |     border: 4px solid rgba(0, 123, 255, 0.3);
357 |     border-radius: 50%;
358 |     border-top: 4px solid #007bff;
359 |     width: 40px; /* Larger spinner from podcast_gui.css */
360 |     height: 40px; /* Larger spinner from podcast_gui.css */
361 |     animation: spin 1s linear infinite;
362 |     margin: 20px auto; /* Center the spinner from podcast_gui.css */
363 |     display: block; /* Ensure it takes its own line */
364 | }
365 | 
366 | @keyframes spin {
367 |     0% { transform: rotate(0deg); }
368 |     100% { transform: rotate(360deg); }
369 | }
370 | 
371 | /* Styles for history page output list */
372 | .output-list {
373 |     margin-top: 20px;
374 | }
375 | 
376 | .output-item {
377 |     background-color: #f9f9f9;
378 |     border: 1px solid #eee;
379 |     border-radius: 8px;
380 |     padding: 15px;
381 |     margin-bottom: 15px;
382 |     box-shadow: 0 1px 3px rgba(0, 0, 0, 0.05);
383 | }
384 | 
385 | .output-item h3 {
386 |     margin-top: 0;
387 |     margin-bottom: 10px;
388 |     color: #0056b3;
389 |     border-bottom: 1px solid #eee;
390 |     padding-bottom: 5px;
391 | }
392 | 
393 | .output-item p {
394 |     margin-bottom: 5px;
395 |     font-size: 0.95em;
396 | }
397 | 
398 | .output-item .button {
399 |     display: inline-block;
400 |     margin-right: 10px;
401 |     margin-top: 10px;
402 |     padding: 8px 15px;
403 |     background-color: #007bff;
404 |     color: white;
405 |     text-decoration: none;
406 |     border-radius: 4px;
407 |     font-size: 0.9em;
408 | }
409 | 
410 | .output-item .button:hover {
411 |     background-color: #0056b3;
412 | }
413 | 
414 | .back-button {
415 |     display: inline-block;
416 |     margin-bottom: 20px;
417 |     padding: 10px 15px;
418 |     background-color: #6c757d;
419 |     color: white;
420 |     text-decoration: none;
421 |     border-radius: 4px;
422 |     font-size: 0.9em;
423 | }
424 | 
425 | .back-button:hover {
426 |     background-color: #5a6268;
427 | }
428 | 
429 | /* Docker Status Widget Styles */
430 | .docker-status-widget {
431 |     background-color: #f8f9fa;
432 |     border: 1px solid #dee2e6;
433 |     border-radius: 8px;
434 |     padding: 20px;
435 |     margin-bottom: 30px;
436 |     box-shadow: 0 2px 5px rgba(0, 0, 0, 0.05);
437 | }
438 | 
439 | .docker-status-widget h3 {
440 |     margin-top: 0;
441 |     margin-bottom: 15px;
442 |     color: #495057;
443 |     border-bottom: 1px solid #dee2e6;
444 |     padding-bottom: 10px;
445 | }
446 | 
447 | .status-indicator {
448 |     display: flex;
449 |     align-items: center;
450 |     margin-bottom: 15px;
451 | }
452 | 
453 | .status-dot {
454 |     width: 12px;
455 |     height: 12px;
456 |     border-radius: 50%;
457 |     margin-right: 10px;
458 |     display: inline-block;
459 | }
460 | 
461 | .status-dot.status-running {
462 |     background-color: #28a745;
463 |     box-shadow: 0 0 5px rgba(40, 167, 69, 0.5);
464 | }
465 | 
466 | .status-dot.status-stopped {
467 |     background-color: #ffc107;
468 | }
469 | 
470 | .status-dot.status-building {
471 |     background-color: #17a2b8;
472 |     animation: pulse 2s infinite;
473 | }
474 | 
475 | .status-dot.status-error {
476 |     background-color: #dc3545;
477 | }
478 | 
479 | @keyframes pulse {
480 |     0% { opacity: 1; }
481 |     50% { opacity: 0.5; }
482 |     100% { opacity: 1; }
483 | }
484 | 
485 | .status-text {
486 |     font-weight: bold;
487 |     color: #495057;
488 | }
489 | 
490 | .docker-controls {
491 |     margin-bottom: 15px;
492 | }
493 | 
494 | .docker-btn {
495 |     display: inline-block;
496 |     padding: 8px 16px;
497 |     margin-right: 10px;
498 |     border: none;
499 |     border-radius: 4px;
500 |     text-decoration: none;
501 |     font-size: 14px;
502 |     cursor: pointer;
503 |     transition: background-color 0.3s ease;
504 | }
505 | 
506 | .docker-btn.start-btn {
507 |     background-color: #28a745;
508 |     color: white;
509 | }
510 | 
511 | .docker-btn.start-btn:hover {
512 |     background-color: #218838;
513 | }
514 | 
515 | .docker-btn.stop-btn {
516 |     background-color: #dc3545;
517 |     color: white;
518 | }
519 | 
520 | .docker-btn.stop-btn:hover {
521 |     background-color: #c82333;
522 | }
523 | 
524 | .docker-btn.ui-btn {
525 |     background-color: #007bff;
526 |     color: white;
527 | }
528 | 
529 | .docker-btn.ui-btn:hover {
530 |     background-color: #0056b3;
531 | }
532 | 
533 | .docker-btn:disabled {
534 |     opacity: 0.6;
535 |     cursor: not-allowed;
536 | }
537 | 
538 | .docker-message {
539 |     font-size: 14px;
540 |     color: #6c757d;
541 |     font-style: italic;
542 | }
543 | 
544 | /* Easy Mode Button */
545 | .easy-mode-button {
546 |     background-color: #28a745; /* Green color */
547 |     color: white;
548 |     padding: 12px 20px;
549 |     border: none;
550 |     border-radius: 5px;
551 |     cursor: pointer;
552 |     font-size: 18px;
553 |     width: 100%;
554 |     box-sizing: border-box;
555 |     margin-bottom: 20px; /* Space below the button */
556 |     display: block; /* Ensure it takes full width */
557 | }
558 | 
559 | .easy-mode-button:hover {
560 |     background-color: #218838; /* Darker green on hover */
561 | }
562 | 
563 | /* Modal Styles */
564 | .modal {
565 |     display: none; /* Hidden by default */
566 |     position: fixed; /* Stay in place */
567 |     z-index: 1000; /* Sit on top */
568 |     left: 0;
569 |     top: 0;
570 |     width: 100%; /* Full width */
571 |     height: 100%; /* Full height */
572 |     overflow: auto; /* Enable scroll if needed */
573 |     background-color: rgba(0,0,0,0.4); /* Black w/ opacity */
574 |     padding-top: 60px;
575 | }
576 | 
577 | .modal-content {
578 |     background-color: #fefefe;
579 |     margin: 5% auto; /* 15% from the top and centered */
580 |     padding: 20px;
581 |     border: 1px solid #888;
582 |     border-radius: 8px;
583 |     width: 80%; /* Could be more or less, depending on screen size */
584 |     max-width: 600px;
585 |     position: relative;
586 |     box-shadow: 0 4px 8px rgba(0,0,0,0.2);
587 |     animation-name: animatetop;
588 |     animation-duration: 0.4s
589 | }
590 | 
591 | /* Add Animation */
592 | @keyframes animatetop {
593 |     from {top: -300px; opacity: 0}
594 |     to {top: 0; opacity: 1}
595 | }
596 | 
597 | .close-button {
598 |     color: #aaa;
599 |     float: right;
600 |     font-size: 28px;
601 |     font-weight: bold;
602 | }
603 | 
604 | .close-button:hover,
605 | .close-button:focus {
606 |     color: black;
607 |     text-decoration: none;
608 |     cursor: pointer;
609 | }
610 | 
611 | #research-description {
612 |     width: 100%;
613 |     padding: 12px;
614 |     border: 1px solid #ccc;
615 |     border-radius: 5px;
616 |     box-sizing: border-box;
617 |     font-size: 16px;
618 |     line-height: 1.5;
619 |     margin-bottom: 15px;
620 |     resize: vertical;
621 |     box-shadow: inset 0 1px 3px rgba(0,0,0,0.1);
622 |     transition: border-color 0.3s, box-shadow 0.3s;
623 | }
624 | 
625 | #research-description:focus {
626 |     border-color: #007bff;
627 |     box-shadow: 0 0 5px rgba(0, 123, 255, 0.5);
628 |     outline: none;
629 | }
630 | 
631 | /* Resume Section Styles */
632 | .resume-section {
633 |     margin-top: 30px;
634 |     padding-top: 20px;
635 |     border-top: 2px solid #eee;
636 | }
637 | 
638 | .resume-section h2 {
639 |     font-size: 1.2em;
640 |     color: #333;
641 |     border-bottom: none;
642 |     padding-bottom: 0;
643 |     margin-bottom: 15px;
644 | }
645 | 
646 | #resume-container {
647 |     display: flex;
648 |     align-items: center;
649 |     gap: 10px;
650 | }
651 | 
652 | #resume-container select {
653 |     flex-grow: 1; /* Allows the select to take up available space */
654 | }
655 | 
656 | #load-podcast-button {
657 |     background-color: #28a745; /* Green color for load button */
658 |     flex-shrink: 0; /* Prevents the button from shrinking */
659 | }
660 | 
661 | #load-podcast-button:hover {
662 |     background-color: #218838;
663 | }
664 | 


--------------------------------------------------------------------------------