├── functions ├── tts │ ├── __init__.py │ ├── gui │ │ ├── __init__.py │ │ └── player.py │ ├── args.py │ ├── utils.py │ └── processing.py ├── scraping │ ├── web.py │ ├── documents.py │ └── reddit.py ├── search │ ├── google.py │ ├── brave.py │ ├── discovery.py │ └── api.py ├── utils.py ├── ai.py ├── config.py ├── args.py └── processing │ ├── summarization.py │ ├── youtube_descriptor.py │ └── report_generation.py ├── settings ├── music │ ├── intro │ │ └── Warrior_Intro.mp3 │ └── outro │ │ └── Warrior_Outro.mp3 ├── images │ ├── background │ │ └── Podcast_Background.png │ ├── guest │ │ ├── open │ │ │ ├── Reed_Gasp-removebg-preview.png │ │ │ ├── Reed_BigMouth-removebg-preview.png │ │ │ ├── Reed_Talking-removebg-preview.png │ │ │ └── Reed_Talkingv2-removebg-preview.png │ │ └── closed │ │ │ └── Reed_MouthClosed_Smiling-removebg-preview.png │ └── host │ │ ├── open │ │ ├── Dundell_Open-removebg-preview.png │ │ ├── Dundell_Surprised-removebg-preview.png │ │ └── Dundell_Talkingv2-removebg-preview.png │ │ └── closed │ │ └── Dundell_Mouth_Closedv2-removebg-preview.png ├── voices │ ├── leo.yaml │ ├── tara.yaml │ └── default.yaml ├── llm_settings │ └── example_ai_models.yml ├── env.example └── characters │ ├── host.yml │ └── guest.yml ├── research └── Example_Docs_Folder │ ├── Mabinogi F2P Reforges Guide_.docx │ ├── Weekly Dungeon Vouchers Guide-.docx │ ├── Fynni Gems Passive Income Guide_.docx │ ├── Mabinogi Farming Lord_Abyss Passes.docx │ └── Mabinogi Adventure Seals Weekly Guide_.docx ├── .gitignore ├── requirements_host.txt ├── run_control_panel.sh ├── templates ├── history.html ├── settings.html ├── main_dashboard.html ├── podcast_builder_form.html └── script_builder_form.html ├── Installer_Windows.bat ├── run_control_panel.bat ├── README.md ├── installation_readme.md ├── LICENSE └── static └── main.css /functions/tts/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /functions/tts/gui/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /settings/music/intro/Warrior_Intro.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ETomberg391/Ecne-AI-Podcaster/HEAD/settings/music/intro/Warrior_Intro.mp3 -------------------------------------------------------------------------------- /settings/music/outro/Warrior_Outro.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ETomberg391/Ecne-AI-Podcaster/HEAD/settings/music/outro/Warrior_Outro.mp3 -------------------------------------------------------------------------------- /settings/images/background/Podcast_Background.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ETomberg391/Ecne-AI-Podcaster/HEAD/settings/images/background/Podcast_Background.png -------------------------------------------------------------------------------- /settings/images/guest/open/Reed_Gasp-removebg-preview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ETomberg391/Ecne-AI-Podcaster/HEAD/settings/images/guest/open/Reed_Gasp-removebg-preview.png -------------------------------------------------------------------------------- /settings/images/host/open/Dundell_Open-removebg-preview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ETomberg391/Ecne-AI-Podcaster/HEAD/settings/images/host/open/Dundell_Open-removebg-preview.png -------------------------------------------------------------------------------- /settings/images/guest/open/Reed_BigMouth-removebg-preview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ETomberg391/Ecne-AI-Podcaster/HEAD/settings/images/guest/open/Reed_BigMouth-removebg-preview.png -------------------------------------------------------------------------------- /settings/images/guest/open/Reed_Talking-removebg-preview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ETomberg391/Ecne-AI-Podcaster/HEAD/settings/images/guest/open/Reed_Talking-removebg-preview.png -------------------------------------------------------------------------------- /research/Example_Docs_Folder/Mabinogi F2P Reforges Guide_.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ETomberg391/Ecne-AI-Podcaster/HEAD/research/Example_Docs_Folder/Mabinogi F2P Reforges Guide_.docx -------------------------------------------------------------------------------- /research/Example_Docs_Folder/Weekly Dungeon Vouchers Guide-.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ETomberg391/Ecne-AI-Podcaster/HEAD/research/Example_Docs_Folder/Weekly Dungeon Vouchers Guide-.docx -------------------------------------------------------------------------------- /settings/images/guest/open/Reed_Talkingv2-removebg-preview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ETomberg391/Ecne-AI-Podcaster/HEAD/settings/images/guest/open/Reed_Talkingv2-removebg-preview.png -------------------------------------------------------------------------------- /settings/images/host/open/Dundell_Surprised-removebg-preview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ETomberg391/Ecne-AI-Podcaster/HEAD/settings/images/host/open/Dundell_Surprised-removebg-preview.png -------------------------------------------------------------------------------- /settings/images/host/open/Dundell_Talkingv2-removebg-preview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ETomberg391/Ecne-AI-Podcaster/HEAD/settings/images/host/open/Dundell_Talkingv2-removebg-preview.png -------------------------------------------------------------------------------- /research/Example_Docs_Folder/Fynni Gems Passive Income Guide_.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ETomberg391/Ecne-AI-Podcaster/HEAD/research/Example_Docs_Folder/Fynni Gems Passive Income Guide_.docx -------------------------------------------------------------------------------- /research/Example_Docs_Folder/Mabinogi Farming Lord_Abyss Passes.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ETomberg391/Ecne-AI-Podcaster/HEAD/research/Example_Docs_Folder/Mabinogi Farming Lord_Abyss Passes.docx -------------------------------------------------------------------------------- /settings/images/host/closed/Dundell_Mouth_Closedv2-removebg-preview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ETomberg391/Ecne-AI-Podcaster/HEAD/settings/images/host/closed/Dundell_Mouth_Closedv2-removebg-preview.png -------------------------------------------------------------------------------- /research/Example_Docs_Folder/Mabinogi Adventure Seals Weekly Guide_.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ETomberg391/Ecne-AI-Podcaster/HEAD/research/Example_Docs_Folder/Mabinogi Adventure Seals Weekly Guide_.docx -------------------------------------------------------------------------------- /settings/images/guest/closed/Reed_MouthClosed_Smiling-removebg-preview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ETomberg391/Ecne-AI-Podcaster/HEAD/settings/images/guest/closed/Reed_MouthClosed_Smiling-removebg-preview.png -------------------------------------------------------------------------------- /settings/voices/leo.yaml: -------------------------------------------------------------------------------- 1 | # settings/voices/leo.yaml 2 | gain_factor: 1.0 3 | trim_end_ms: 100 4 | nr_level: 0 5 | compress_thresh: 0.001 6 | compress_ratio: 1 7 | norm_frame_len: 10 8 | norm_gauss_size: 3 9 | deesser_freq: 5000 -------------------------------------------------------------------------------- /settings/voices/tara.yaml: -------------------------------------------------------------------------------- 1 | # settings/voices/tara.yaml 2 | gain_factor: 1 3 | trim_end_ms: 120 4 | nr_level: 35 5 | compress_thresh: 0.03 6 | compress_ratio: 2 7 | norm_frame_len: 20 8 | norm_gauss_size: 15 9 | deesser_freq: 5000 -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .env 2 | settings/llm_settings/ai_models.yml 3 | /orpheus_tts_setup/* 4 | functions/__pycache__/* 5 | */*/__pycache__/* 6 | /outputs/* 7 | /research/* 8 | /uploads/* 9 | */*/__pycache__/* 10 | */*/*/__pycache__/* 11 | temp_audio/* 12 | 13 | -------------------------------------------------------------------------------- /requirements_host.txt: -------------------------------------------------------------------------------- 1 | requests 2 | PyYAML 3 | python-dotenv 4 | beautifulsoup4 5 | newspaper4k 6 | PyPDF2 7 | python-docx 8 | selenium 9 | soundfile 10 | numpy 11 | Pillow 12 | nltk 13 | pydub 14 | matplotlib 15 | scipy 16 | pygame 17 | lxml_html_clean 18 | flask 19 | moviepy==1.0.3 20 | # audioop-lts not available for Python 3.12 on Windows - using built-in audioop for now -------------------------------------------------------------------------------- /settings/voices/default.yaml: -------------------------------------------------------------------------------- 1 | # settings/voices/default.yaml 2 | gain_factor: 1.0 3 | trim_end_ms: 0 4 | nr_level: 0 5 | compress_thresh: 1.0 # Threshold effectively disables compression (1.0 = 0dBFS) 6 | compress_ratio: 1 # Ratio 1 means no compression 7 | norm_frame_len: 10 # Lowest default value used 8 | norm_gauss_size: 3 # Lowest default value used (must be odd) 9 | deesser_freq: 3000 # Lowest allowed frequency -------------------------------------------------------------------------------- /settings/llm_settings/example_ai_models.yml: -------------------------------------------------------------------------------- 1 | # Model configurations for the AI Podcast Generator 2 | # Define different models and their parameters for /chat/completions API calls. 3 | 4 | # Configuration for the default model accessed via the current endpoint 5 | default_model: 6 | api_endpoint: "" 7 | api_key: "sk1-example" 8 | model: "QwQ-32B_Example" # Example: Replace with the actual default model if known 9 | temperature: 0.7 10 | 11 | # Configuration for Gemini 2.0 Flash (Experimental) 12 | gemini_flash: 13 | api_endpoint: "https://generativelanguage.googleapis.com/v1beta/openai/" 14 | api_key: "" 15 | model: "gemini-2.5-flash-preview-05-20" 16 | max_tokens: 65536 17 | # top_p: 0.95 # Optional parameter -------------------------------------------------------------------------------- /settings/env.example: -------------------------------------------------------------------------------- 1 | # .env file for AI Podcast Generator 2 | 3 | # --- Model Selection --- 4 | # Specifies the default model configuration to use from ai_models.yml 5 | # Options: default_model, gemini_flash (or others defined in ai_models.yml) 6 | DEFAULT_MODEL_CONFIG="default_model" # Change this to 'gemini_flash' to use Gemini by default 7 | 8 | # --- Search APIs --- 9 | # Google Custom Search API Credentials 10 | # 1. Get API Key from Google Cloud Console (Credentials page) 11 | GOOGLE_API_KEY="" 12 | # 2. Get Search Engine ID (cx) from Programmable Search Engine control panel (make sure "Search entire web" is ON) 13 | GOOGLE_CSE_ID="" 14 | 15 | # Brave Search API Key (Get from https://api.search.brave.com/) 16 | BRAVE_API_KEY="" 17 | -------------------------------------------------------------------------------- /run_control_panel.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Script to activate virtual environment and run control panel app 4 | # Navigate to the script's directory 5 | cd "$(dirname "$0")" 6 | 7 | # Check if virtual environment exists 8 | if [ ! -d "host_venv" ]; then 9 | echo "Error: Virtual environment 'host_venv' not found in current directory." 10 | echo "Please ensure you're running this script from the project root directory." 11 | exit 1 12 | fi 13 | 14 | # Check if control_panel_app.py exists 15 | if [ ! -f "control_panel_app.py" ]; then 16 | echo "Error: control_panel_app.py not found in current directory." 17 | echo "Please ensure you're running this script from the project root directory." 18 | exit 1 19 | fi 20 | 21 | echo "Activating virtual environment..." 22 | source host_venv/bin/activate 23 | 24 | # Check if activation was successful 25 | if [ $? -ne 0 ]; then 26 | echo "Error: Failed to activate virtual environment." 27 | exit 1 28 | fi 29 | 30 | echo "Virtual environment activated successfully." 31 | echo "Starting Control Panel App..." 32 | echo "================================================================" 33 | 34 | # Function to open URL in default browser 35 | open_url() { 36 | local url="http://127.0.0.1:5000" 37 | echo "Attempting to open $url in your default browser..." 38 | case "$(uname -s)" in 39 | Linux*) xdg-open "$url" >/dev/null 2>&1 & ;; 40 | Darwin*) open "$url" & ;; 41 | *) echo "Please open your browser and navigate to $url" ;; 42 | esac 43 | } 44 | 45 | # Open the browser in the background after a short delay 46 | (sleep 2 && open_url) & 47 | 48 | # Run the control panel app in the foreground 49 | python control_panel_app.py -------------------------------------------------------------------------------- /templates/history.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | Output History - Ecne AI Podcaster Control Panel 7 | 8 | 9 | 10 |
11 |

Output History

12 |

Browse and manage all your generated scripts, audio, and videos.

13 | 14 | Back to Dashboard 15 | 16 | {% if output_files %} 17 |
18 | {% for file in output_files %} 19 |
20 |

{{ file.name }}

21 |

Type: {{ file.type | capitalize }}

22 |

Path: {{ file.path }}

23 |

Size: {{ "%.2f" | format(file.size / 1024 / 1024) }} MB

24 |

Last Modified: {{ file.modified }}

25 | Download 26 | {% if file.type == 'video' %} 27 | View/Play Video 28 | {% elif file.type == 'script' %} 29 | View Script 30 | {% endif %} 31 |
32 | {% endfor %} 33 |
34 | {% else %} 35 |

No generated output files found yet.

36 | {% endif %} 37 |
38 | 39 | -------------------------------------------------------------------------------- /settings/characters/host.yml: -------------------------------------------------------------------------------- 1 | # Character Profile: Host 2 | name: "Eric Dundell" 3 | podcast_name: "Dundell's Cyberspace" 4 | profession: "Podcast Host / Tech Enthusiast" 5 | background: "Former tech journalist, always curious about the 'why' behind technology. Passionate about making complex topics accessible." 6 | education: "B.A. in Communications" 7 | personality_traits: 8 | - Relaxed and conversational 9 | - Deeply inquisitive - always asks follow-up questions 10 | - Empathetic listener who breaks down complex topics 11 | - Guides guests to explain terms and concepts 12 | - Makes topics accessible through analogies 13 | - Occasionally injects light humor 14 | speaking_style: 15 | - Always starts topics with "explain like I'm new to this" 16 | - Uses multiple follow-up questions per topic 17 | - Asks for real-world examples and implications 18 | - Ensures terms are explained for listeners 19 | - Explores each topic thoroughly before moving on 20 | - Creates smooth transitions between topics 21 | interaction_patterns: 22 | - Starts each topic with a basic understanding question 23 | - Follows up on technical terms mentioned by guest 24 | - Asks for clarification on complex concepts 25 | - Connects topics to previous discussions 26 | - Summarizes before changing topics 27 | example_phrases: 28 | - "For our listeners who are new to this, can you explain what [term] means?" 29 | - "That's fascinating! Let's break that down a bit. First, what exactly is...?" 30 | - "You mentioned [term]. Could you explain that in simpler terms?" 31 | - "So, if I'm understanding correctly, you're saying that..." 32 | - "That's quite technical. How would this affect everyday users?" 33 | - "Before we move on, could you give us a real-world example?" 34 | - "Coming back to what you said about [previous point]..." 35 | - "Let's explore that aspect a bit more. How does it relate to...?" -------------------------------------------------------------------------------- /settings/characters/guest.yml: -------------------------------------------------------------------------------- 1 | # Character Profile: Expert Guest 2 | name: "Dr. Evelyn Reed" # Can be overridden based on topic if needed 3 | podcast_name: "Dundell's Cyberspace" 4 | profession: "Variable - AI Researcher / Ethicist / Specific Field Expert" 5 | background: "Deeply knowledgeable in their field (determined by the podcast topic). Often involved in research or practical application. Enjoys sharing knowledge." 6 | education: "Ph.D. in relevant field (e.g., Computer Science, Philosophy, Biology)" 7 | personality_traits: 8 | - Deep expert knowledge with teaching mindset 9 | - Welcomes and anticipates follow-up questions 10 | - Breaks complex topics into digestible parts 11 | - Connects different concepts naturally 12 | - Patient with repeated clarification requests 13 | - Enthusiastic about sharing knowledge 14 | speaking_style: 15 | - Starts with high-level overview before details 16 | - Defines technical terms as they're introduced 17 | - Uses analogies for complex concepts 18 | - Builds answers in clear, logical steps 19 | - References previous points to show connections 20 | - Maintains engaging, conversational tone 21 | response_patterns: 22 | - Gives brief overview before detailed explanation 23 | - Anticipates and defines technical terms 24 | - Provides examples after explaining concepts 25 | - Welcomes interruptions for clarification 26 | - Links back to earlier discussions 27 | - Acknowledges host's analogies and builds on them 28 | example_phrases: 29 | - "Let me start with a simple overview..." 30 | - "In basic terms, [concept] means... More specifically..." 31 | - "When I say [technical term], I mean..." 32 | - "Think of it like... [analogy]" 33 | - "This connects to what we discussed earlier about..." 34 | - "There are three key aspects here. First..." 35 | - "Your analogy is spot-on, and to build on that..." 36 | - "Let me break this down step by step..." 37 | - "The practical impact of this is..." -------------------------------------------------------------------------------- /Installer_Windows.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | setlocal enabledelayedexpansion 3 | 4 | :: Get the directory where this batch file is located 5 | set "BATCH_DIR=%~dp0" 6 | 7 | :: Change to the batch file's directory to ensure we're in the right location 8 | cd /d "%BATCH_DIR%" 9 | 10 | :: Check for Administrator privileges 11 | >nul 2>&1 "%SYSTEMROOT%\system32\cacls.exe" "%SYSTEMROOT%\system32\config\system" 12 | 13 | if '%errorlevel%' NEQ '0' ( 14 | echo. 15 | echo ================================ 16 | echo ADMINISTRATOR REQUIRED 17 | echo ================================ 18 | echo. 19 | echo This installer must be run with Administrator privileges. 20 | echo. 21 | echo Right-click on this batch file and select "Run as administrator" 22 | echo. 23 | echo Press any key to close... 24 | pause >nul 25 | exit /b 1 26 | ) 27 | 28 | :: If we reach here, we have admin privileges 29 | echo Running Orpheus TTS Windows Installer with Administrator privileges... 30 | echo Current directory: "%CD%" 31 | echo. 32 | 33 | :: Check if PowerShell script exists 34 | if not exist "settings\install\Installer.ps1" ( 35 | echo ERROR: Installer.ps1 not found in settings\install\ directory. 36 | echo Current directory: "%CD%" 37 | echo Batch file directory: "%BATCH_DIR%" 38 | echo Please ensure the installer files are properly organized. 39 | echo. 40 | echo Press any key to close... 41 | pause >nul 42 | exit /b 1 43 | ) 44 | 45 | :: Run the PowerShell installer with execution policy bypass from the correct directory 46 | powershell.exe -ExecutionPolicy Bypass -File ".\settings\install\Installer.ps1" 47 | 48 | :: Check if PowerShell script completed successfully 49 | if '%errorlevel%' NEQ '0' ( 50 | echo. 51 | echo Installation completed with errors. Check the output above for details. 52 | echo. 53 | echo Press any key to close... 54 | pause >nul 55 | exit /b %errorlevel% 56 | ) 57 | 58 | echo. 59 | echo Installation completed successfully! 60 | echo Press any key to close... 61 | pause >nul 62 | -------------------------------------------------------------------------------- /run_control_panel.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | REM Batch script to activate virtual environment and run control panel app 3 | REM This script can be double-clicked in Windows File Explorer 4 | 5 | echo ================================================================ 6 | echo Ecne AI Podcaster - Control Panel Launcher 7 | echo ================================================================ 8 | echo. 9 | 10 | REM Navigate to the script's directory 11 | cd /d "%~dp0" 12 | 13 | REM Check if virtual environment exists 14 | if not exist "host_venv" ( 15 | echo Error: Virtual environment 'host_venv' not found in current directory. 16 | echo Please ensure you're running this script from the project root directory. 17 | echo Run the Installer.ps1 script first to set up the environment. 18 | echo. 19 | pause 20 | exit /b 1 21 | ) 22 | 23 | REM Check if control_panel_app.py exists 24 | if not exist "control_panel_app.py" ( 25 | echo Error: control_panel_app.py not found in current directory. 26 | echo Please ensure you're running this script from the project root directory. 27 | echo. 28 | pause 29 | exit /b 1 30 | ) 31 | 32 | REM Check if the Python executable exists in the virtual environment 33 | if not exist "host_venv\Scripts\python.exe" ( 34 | echo Error: Python executable not found in virtual environment. 35 | echo Please ensure the virtual environment was created properly. 36 | echo You may need to recreate it by running the Installer.ps1 script. 37 | echo. 38 | pause 39 | exit /b 1 40 | ) 41 | 42 | echo Activating virtual environment... 43 | echo Virtual environment found and ready. 44 | echo. 45 | echo Starting Control Panel App... 46 | echo ================================================================ 47 | echo The Control Panel will open in your default web browser. 48 | echo If it doesn't open automatically, navigate to: http://localhost:5000 49 | echo. 50 | echo IMPORTANT: Keep this window open while using the Control Panel. 51 | echo To stop the server, close this window or press Ctrl+C. 52 | echo ================================================================ 53 | echo. 54 | 55 | REM Run the control panel app using the virtual environment's Python 56 | call "host_venv\Scripts\activate.bat" 57 | python "control_panel_app.py" 58 | 59 | REM If we reach here, the app has stopped 60 | echo. 61 | echo Control Panel App has stopped. 62 | echo You can close this window now. 63 | pause -------------------------------------------------------------------------------- /functions/scraping/web.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import time 3 | import random 4 | from newspaper import Article, ArticleException # Using newspaper4k for better web scraping 5 | 6 | from ..utils import log_to_file, USER_AGENTS # Import utilities including USER_AGENTS 7 | 8 | def scrape_website_url(url): 9 | """Scrapes content from a single website URL using newspaper4k.""" 10 | print(f" - Scraping URL (Newspaper4k): {url}") 11 | log_to_file(f"Scraping website URL: {url}") 12 | try: 13 | headers = {'User-Agent': random.choice(USER_AGENTS)} 14 | article = Article(url, request_headers=headers, fetch_images=False) 15 | article.download() 16 | # Handle potential download errors before parsing 17 | if article.download_state != 2: # 2 means success 18 | raise ArticleException(f"Download failed with state {article.download_state}") 19 | article.parse() 20 | 21 | title = article.title 22 | text = article.text 23 | publish_date = article.publish_date 24 | 25 | if text and len(text) > 150: # Basic quality check 26 | content = f"Source URL: {url}\n" 27 | if title: content += f"Title: {title}\n" 28 | if publish_date: content += f"Published: {publish_date.strftime('%Y-%m-%d') if publish_date else 'N/A'}\n" 29 | content += f"\nBody:\n{text}" 30 | print(f" - Success: Scraped content ({len(text)} chars).") 31 | log_to_file(f"Website scrape success: {url} ({len(text)} chars)") 32 | return content.strip() 33 | elif text: 34 | print(" - Warning: Scraped text seems too short, skipping.") 35 | log_to_file(f"Website scrape warning (too short): {url} ({len(text)} chars)") 36 | return None 37 | else: 38 | print(" - Warning: Newspaper4k found no text.") 39 | log_to_file(f"Website scrape warning (no text): {url}") 40 | return None 41 | 42 | except ArticleException as e: # Assuming newspaper4k still uses ArticleException 43 | print(f" - Error (Newspaper4k) scraping {url}: {e}") 44 | log_to_file(f"Website scrape ArticleException: {url} - {e}") 45 | return None 46 | except requests.exceptions.RequestException as e: 47 | print(f" - Error (Request) fetching {url}: {e}") 48 | log_to_file(f"Website scrape RequestException: {url} - {e}") 49 | return None 50 | except Exception as e: 51 | print(f" - Unexpected error scraping {url}: {e}") 52 | log_to_file(f"Website scrape Unexpected Error: {url} - {e}") 53 | return None 54 | finally: 55 | time.sleep(random.uniform(1.5, 3)) # Delay between website scrapes -------------------------------------------------------------------------------- /functions/search/google.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import time 3 | import random # For random delays 4 | 5 | from ..utils import log_to_file # Import log_to_file from the utils module 6 | 7 | def search_google_api(query, config, num_results, from_date=None, to_date=None): 8 | """Performs search using Google Custom Search API.""" 9 | urls = [] 10 | api_key = config.get("google_api_key") 11 | cse_id = config.get("google_cse_id") 12 | if not api_key or not cse_id: 13 | log_to_file("Google API search skipped: API Key or CSE ID missing.") 14 | print(" - Google API search skipped: API Key or CSE ID missing.") 15 | return None # Indicate skipped/failed 16 | 17 | search_url = "https://www.googleapis.com/customsearch/v1" 18 | effective_query = query 19 | if from_date: effective_query += f" after:{from_date}" 20 | if to_date: effective_query += f" before:{to_date}" 21 | 22 | print(f" - Searching Google API: '{effective_query}' (Num: {num_results})") 23 | log_to_file(f"Google API Search: Query='{effective_query}', Num={num_results}") 24 | 25 | params = {'key': api_key, 'cx': cse_id, 'q': effective_query, 'num': min(num_results, 10)} # Google max 10 per req 26 | 27 | try: 28 | response = requests.get(search_url, params=params, timeout=20) 29 | response.raise_for_status() 30 | search_data = response.json() 31 | 32 | if 'items' in search_data: 33 | urls = [item['link'] for item in search_data['items'] if 'link' in item] 34 | print(f" - Google Found: {len(urls)} results.") 35 | log_to_file(f"Google API Success: Found {len(urls)} URLs.") 36 | else: 37 | print(" - Google Found: 0 results.") 38 | log_to_file("Google API Success: No items found in response.") 39 | 40 | # Check for quota error explicitly 41 | if 'error' in search_data and search_data['error'].get('code') == 429: 42 | print(" - !! Google API Quota limit likely reached !!") 43 | log_to_file("Google API Error: Quota limit reached (429 in response body).") 44 | return 'quota_error' 45 | return urls 46 | 47 | except requests.exceptions.HTTPError as e: 48 | print(f" - Error calling Google API: {e}") 49 | log_to_file(f"Google API HTTP Error: {e}") 50 | if e.response.status_code == 429: 51 | print(" - !! Google API Quota limit likely reached (HTTP 429) !!") 52 | log_to_file("Google API Error: Quota limit reached (HTTP 429).") 53 | return 'quota_error' 54 | return None # General HTTP error 55 | except requests.exceptions.RequestException as e: 56 | print(f" - Error calling Google API: {e}") 57 | log_to_file(f"Google API Request Error: {e}") 58 | return None 59 | except Exception as e: 60 | print(f" - Unexpected error during Google API search: {e}") 61 | log_to_file(f"Google API Unexpected Error: {e}") 62 | return None 63 | finally: 64 | time.sleep(random.uniform(1, 2)) # Delay -------------------------------------------------------------------------------- /functions/search/brave.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import time 3 | import random # For random delays 4 | import urllib.parse 5 | import datetime 6 | 7 | from ..utils import log_to_file # Import log_to_file from the utils module 8 | 9 | def search_brave_api(query, config, num_results, from_date=None, to_date=None): 10 | """Performs search using Brave Search API.""" 11 | urls = [] 12 | api_key = config.get("brave_api_key") 13 | if not api_key: 14 | log_to_file("Brave API search skipped: API Key missing.") 15 | print(" - Brave API search skipped: API Key missing.") 16 | return None 17 | 18 | search_url = "https://api.search.brave.com/res/v1/web/search" 19 | headers = {"Accept": "application/json", "Accept-Encoding": "gzip", "X-Subscription-Token": api_key} 20 | effective_query = query 21 | freshness_param = None 22 | 23 | # Brave uses 'freshness=pd:YYYYMMDD,YYYYMMDD' 24 | if from_date: 25 | try: 26 | from_dt = datetime.datetime.strptime(from_date, '%Y-%m-%d') 27 | freshness_start = from_dt.strftime('%Y%m%d') 28 | freshness_end = "" 29 | if to_date: 30 | to_dt = datetime.datetime.strptime(to_date, '%Y-%m-%d') 31 | freshness_end = to_dt.strftime('%Y%m%d') 32 | freshness_param = f"pd:{freshness_start},{freshness_end}" 33 | except ValueError: 34 | print(f" - Warning: Invalid date format for Brave freshness '{from_date}' or '{to_date}'. Skipping date filter.") 35 | log_to_file(f"Brave API Warning: Invalid date format '{from_date}'/'{to_date}' for freshness.") 36 | 37 | print(f" - Searching Brave API: '{effective_query}' (Num: {num_results})") 38 | log_to_file(f"Brave API Search: Query='{effective_query}', Num={num_results}, Freshness='{freshness_param}'") 39 | 40 | params = {'q': effective_query, 'count': num_results} 41 | if freshness_param: params['freshness'] = freshness_param 42 | 43 | try: 44 | response = requests.get(search_url, headers=headers, params=params, timeout=20) 45 | response.raise_for_status() 46 | search_data = response.json() 47 | 48 | if 'web' in search_data and 'results' in search_data['web']: 49 | urls = [item['url'] for item in search_data['web']['results'] if 'url' in item] 50 | print(f" - Brave Found: {len(urls)} results.") 51 | log_to_file(f"Brave API Success: Found {len(urls)} URLs.") 52 | else: 53 | print(" - Brave Found: 0 results.") 54 | log_to_file(f"Brave API Success: No web/results found in response. Structure: {search_data.keys()}") 55 | return urls 56 | 57 | except requests.exceptions.HTTPError as e: 58 | print(f" - Error calling Brave API: {e}") 59 | log_to_file(f"Brave API HTTP Error: {e}") 60 | if e.response.status_code == 429: 61 | print(" - !! Brave API Quota limit likely reached (HTTP 429) !!") 62 | log_to_file("Brave API Error: Quota limit reached (HTTP 429).") 63 | return 'quota_error' 64 | return None 65 | except requests.exceptions.RequestException as e: 66 | print(f" - Error calling Brave API: {e}") 67 | log_to_file(f"Brave API Request Error: {e}") 68 | return None 69 | except Exception as e: 70 | print(f" - Unexpected error during Brave API search: {e}") 71 | log_to_file(f"Brave API Unexpected Error: {e}") 72 | return None 73 | finally: 74 | time.sleep(random.uniform(1, 2)) # Delay -------------------------------------------------------------------------------- /functions/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import datetime 3 | import re 4 | import random # Required for USER_AGENTS 5 | 6 | # Global variables to hold the current run's archive directory and log file handler 7 | run_archive_dir = None 8 | log_file_path = None 9 | log_file_handler = None 10 | 11 | def set_run_archive_dir(path): 12 | """Sets the global run_archive_dir and initializes the log file path.""" 13 | global run_archive_dir, log_file_path, log_file_handler 14 | # Close any existing log file handler before changing paths 15 | if log_file_handler: 16 | log_file_handler.close() 17 | log_file_handler = None 18 | 19 | run_archive_dir = path 20 | if run_archive_dir: 21 | log_file_path = os.path.join(run_archive_dir, f"ai_podcast_run_{datetime.datetime.now().strftime('%Y%m%d')}.log") 22 | try: 23 | # Open the file in append mode and keep the handler 24 | log_file_handler = open(log_file_path, 'a', encoding='utf-8') 25 | except IOError as e: 26 | print(f"Fatal: Could not open log file for writing at {log_file_path}: {e}") 27 | log_file_handler = None 28 | else: 29 | log_file_path = None 30 | 31 | # User agents for requests/scraping 32 | USER_AGENTS = [ 33 | 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', 34 | 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.1 Safari/605.1.15', 35 | 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0', 36 | ] 37 | 38 | def log_to_file(message): 39 | """Appends a message to the log file using the global file handler.""" 40 | if not log_file_handler: 41 | print(f"Warning: Log file handler not available. Could not log: {message}") 42 | return 43 | 44 | try: 45 | timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") 46 | log_file_handler.write(f"[{timestamp}] {message}\n") 47 | log_file_handler.flush() # Ensure it's written to disk immediately 48 | except Exception as e: 49 | print(f"Warning: Could not write to log file {log_file_path}: {e}") 50 | 51 | def close_log_file(): 52 | """Closes the global log file handler.""" 53 | global log_file_handler 54 | if log_file_handler: 55 | try: 56 | log_file_handler.close() 57 | log_file_handler = None 58 | print("Log file closed.") 59 | except Exception as e: 60 | print(f"Warning: Error closing log file: {e}") 61 | 62 | def clean_thinking_tags(text): 63 | """Recursively remove all content within ... tags.""" 64 | if text is None: return "" 65 | prev_text = "" 66 | current_text = str(text) # Ensure it's a string 67 | # Keep cleaning until no more changes are made (handles nested tags) 68 | while prev_text != current_text: 69 | prev_text = current_text 70 | current_text = re.sub(r'.*?', '', prev_text, flags=re.IGNORECASE | re.DOTALL) 71 | return current_text.strip() 72 | 73 | def parse_ai_tool_response(response_text, tool_tag): 74 | """ 75 | Parses content within the *last* occurrence of specific ... markers 76 | after cleaning thinking tags. 77 | """ 78 | cleaned_text = clean_thinking_tags(response_text) 79 | if not cleaned_text: return "" 80 | 81 | # Find the last opening tag (case-insensitive) 82 | open_tag = f'<{tool_tag}>' 83 | close_tag = f'' 84 | last_open_tag_index = cleaned_text.lower().rfind(open_tag.lower()) # Case-insensitive find 85 | 86 | if last_open_tag_index != -1: 87 | # Find the first closing tag *after* the last opening tag (case-insensitive) 88 | # Search starting from the position after the last open tag 89 | search_start_index = last_open_tag_index + len(open_tag) 90 | first_close_tag_index_after_last_open = cleaned_text.lower().find(close_tag.lower(), search_start_index) # Case-insensitive find 91 | 92 | if first_close_tag_index_after_last_open != -1: 93 | # Extract content between the tags 94 | start_content_index = last_open_tag_index + len(open_tag) 95 | content = cleaned_text[start_content_index:first_close_tag_index_after_last_open] 96 | return content.strip() 97 | else: 98 | # Found opening tag but no corresponding closing tag afterwards 99 | log_msg = f"Warning: Found last '<{tool_tag}>' but no subsequent ''. Returning full cleaned response." 100 | print(f"\n{log_msg}") 101 | log_to_file(f"{log_msg}\nResponse was:\n{cleaned_text}") 102 | return cleaned_text # Fallback 103 | else: 104 | # No opening tag found at all 105 | log_msg = f"Warning: Tool tag '<{tool_tag}>' not found in AI response. Returning full cleaned response." 106 | print(f"\n{log_msg}") 107 | log_to_file(f"{log_msg}\nResponse was:\n{cleaned_text}") 108 | return cleaned_text # Fallback -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Update Summary for 7/18/2025: 2 | 3 | * **Podcast Builder Enhancements:** 4 | * Fixed padding issues in GUI, ensuring consistent spacing between speakers (750ms) and same-speaker segments (100ms). 5 | * Added "Save and Close" for progress saving and a resume feature for editing completed podcasts. 6 | * Resolved "Missing Audio" errors, enabling segment regeneration for corrupt audio. 7 | * **Audio Quality & Trimming:** 8 | * Addressed some of the split-second audio glitches at segment ends, exploring increased trimming (10-150ms) and new viewing tools. 9 | * **Script Builder Improvements:** 10 | * Defaulted YouTube description building and streamlined settings into a dropdown menu. 11 | * Introduced "Easy mode" for automated script topic, keywords, and guidance. 12 | * **Project Organization:** 13 | * Reworked output folders for better tracking of scripts, archived, and finalized podcast videos. 14 | * **Future Work:** 15 | * Simplifying installation scripts (no sudo, transparent `installation_readme`). 16 | * Creating documentation for podcast customization (characters, images, music, voices). 17 | * Researching new TTS services with Docker FastAPI (e.g., Chatterbox) with low VRAM requirements (max 6GB). 18 | 19 | # Ecne AI Podcaster 20 | 21 | Automated AI podcast generation from topic/keywords to final video. Leverages web research, LLMs for scripting, and TTS for audio synthesis. 22 | 23 | ![image](https://github.com/user-attachments/assets/ca081333-1955-4419-a09c-8ec79a11ad38) 24 | 25 | 26 |
27 | Screenshot_20250526_230535Screenshot_20250526_230602 28 |
29 | 30 | ![ecneAI_Podcast](https://github.com/user-attachments/assets/8ee380bd-aea0-45f1-8651-40784778b7ee) 31 | 32 | ## ✨ Features 33 | 34 | - **Web Control Panel:** Easy-to-use browser interface for the complete podcast creation workflow 35 | - **Script Generation:** AI-powered research and script writing from topics, keywords, or documents 36 | - **Podcast Production:** High-quality TTS with Orpheus and video assembly 37 | - **Docker Integration:** Automated TTS backend setup via Docker 38 | - **Multi-Voice Support:** Distinct host and guest voices with audio processing 39 | 40 | --- 41 | 42 | ## 🚀 Quick Start 43 | 44 | ### 1. Installation 45 | Note: Installer.sh made for Linux OS's (Arch Linux tested). Windows installer Pending. 46 | ```bash 47 | git clone https://github.com/ETomberg391/Ecne-AI-Podcaster 48 | cd Ecne-AI-Podcaster 49 | chmod +x Installer.sh 50 | ./Installer.sh 51 | ``` 52 | 53 | ### 2. Start the WebGUI Control Panel 54 | ```bash 55 | ./run_control_panel.sh 56 | ``` 57 | 58 | ### 3. Access the Web Interface 59 | Open your browser and go to: **http://localhost:5000** 60 | 61 | --- 62 | 63 | ## 🎛️ Control Panel Features 64 | 65 | The web control panel provides everything you need: 66 | 67 | ### **Dashboard** 68 | - Quick overview and navigation 69 | - System status monitoring 70 | 71 | ### **Script Builder** 72 | - Topic and keyword input 73 | - Document upload support (PDF, DOCX, TXT) 74 | - Web search integration (Google/Brave APIs) 75 | - AI model selection 76 | - Real-time progress streaming 77 | 78 | ### **Podcast Builder** 79 | - Script selection from generated scripts 80 | - Voice configuration (host/guest) 81 | - Audio and video settings 82 | - Development mode for segment review 83 | 84 | ### **Settings** 85 | - API key management (OpenAI, Google, Brave, etc.) 86 | - LLM model configuration 87 | - Voice profiles and audio processing 88 | 89 | ### **History** 90 | - Browse generated scripts and videos 91 | - Download completed podcasts 92 | - Archive management 93 | 94 | ### **Docker Management** 95 | - Start/stop Orpheus TTS services 96 | - Container status monitoring 97 | - Automated setup 98 | 99 | --- 100 | 101 | ## 📋 Prerequisites 102 | 103 | - Linux OS (Ubuntu/Debian recommended) 104 | - Git, Python 3.8+, Docker, FFmpeg 105 | - NVIDIA GPU with Container Toolkit (recommended for TTS) 106 | 107 | The installer handles most dependencies automatically. 108 | 109 | --- 110 | 111 | ## 🎯 Workflow 112 | 113 | 1. **Configure Settings:** Add your API keys and select LLM models 114 | 2. **Generate Script:** Enter topic/keywords or upload documents 115 | 3. **Create Podcast:** Select script, choose voices, generate video 116 | 4. **Download:** Access your completed podcast from the History page 117 | 118 | --- 119 | 120 | ## 🎬 Examples 121 | 122 | * **Mabinogi Reforging Guide:** 123 | * [![YouTube](https://img.youtube.com/vi/gHvIbpv95iQ/0.jpg)](https://youtu.be/gHvIbpv95iQ?si=yjsy_GlQMz_QKqHH) 124 | * **Dundell's Cyberspace - What are Game Emulators?:** 125 | * [![YouTube](https://img.youtube.com/vi/9pTBPMgRlBU/0.jpg)](https://youtu.be/zbZmEwGinoA?si=hSPlLnpuAsajUtsb) 126 | 127 | --- 128 | 129 | ## 🙏 Credits 130 | 131 | Built with [Orpheus-FastAPI](https://github.com/Lex-au/Orpheus-FastAPI) for TTS and [Orpheus TTS](https://github.com/canopyai/Orpheus-TTS) model. 132 | 133 | ## 📜 License 134 | 135 | Apache License 2.0 136 | -------------------------------------------------------------------------------- /functions/ai.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import json 3 | import time 4 | import os 5 | import random # Used for retry delay jitter 6 | 7 | from .utils import log_to_file, clean_thinking_tags # Import necessary functions from utils 8 | 9 | def call_ai_api(prompt, config, tool_name="General", timeout=300, retries=1, base_wait_time=60): 10 | """ 11 | Generic function to call the OpenAI-compatible API with retry logic. 12 | - Handles Timeouts and 429 Rate Limit errors with exponential backoff. 13 | """ 14 | print(f"\nSending {tool_name} request to AI...") 15 | log_to_file(f"Initiating API Call (Tool: {tool_name})") 16 | 17 | model_config = config.get("selected_model_config") 18 | if not model_config: 19 | final_model_key = config.get('final_model_key', 'N/A') 20 | print(f"Error: Selected model configuration ('{final_model_key}') not found. Cannot call API.") 21 | log_to_file(f"API Call Error: selected_model_config missing for key '{final_model_key}'.") 22 | return None, None 23 | 24 | api_key = model_config.get("api_key") 25 | api_endpoint = model_config.get("api_endpoint") 26 | if not api_key or not api_endpoint: 27 | final_model_key = config.get('final_model_key', 'N/A') 28 | print(f"Error: 'api_key' or 'api_endpoint' missing in config for '{final_model_key}'.") 29 | log_to_file(f"API Call Error: api_key or api_endpoint missing for model key '{final_model_key}'.") 30 | return None, None 31 | 32 | headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"} 33 | payload = { 34 | "model": model_config.get("model"), 35 | "messages": [{"role": "user", "content": prompt}], 36 | } 37 | # Dynamically add optional parameters from config 38 | for param in ["temperature", "max_tokens", "top_p"]: 39 | if param in model_config: 40 | # Ensure correct type, e.g., float for temp, int for tokens 41 | try: 42 | if param == "temperature" or param == "top_p": 43 | payload[param] = float(model_config[param]) 44 | elif param == "max_tokens": 45 | payload[param] = int(model_config[param]) 46 | except (ValueError, TypeError): 47 | print(f"Warning: Could not convert '{param}' to the correct type. Using default.") 48 | log_to_file(f"Config Warning: Could not convert '{param}' value '{model_config[param]}'.") 49 | 50 | 51 | if not payload.get("model"): 52 | print(f"Error: 'model' key is missing in the final payload for config '{config.get('DEFAULT_MODEL_CONFIG')}'.") 53 | log_to_file("API Call Error: 'model' key missing in payload.") 54 | return None, None 55 | 56 | log_to_file(f"API Call Details:\nEndpoint: {api_endpoint}\nPayload: {json.dumps(payload, indent=2)}") 57 | full_api_url = api_endpoint.rstrip('/') + "/chat/completions" 58 | 59 | for attempt in range(retries + 1): 60 | try: 61 | response = requests.post(full_api_url, headers=headers, json=payload, timeout=timeout) 62 | response.raise_for_status() 63 | 64 | result = response.json() 65 | log_to_file(f"Raw API Response (Attempt {attempt + 1}):\n{json.dumps(result, indent=2)}") 66 | 67 | if not result.get("choices") or not result["choices"][0].get("message") or not result["choices"][0]["message"].get("content"): 68 | raise ValueError("Invalid response structure received from API.") 69 | 70 | print(f"{tool_name} response received.") 71 | message_content = result["choices"][0]["message"]["content"] 72 | cleaned_message = clean_thinking_tags(message_content) 73 | return message_content, cleaned_message 74 | 75 | except requests.exceptions.Timeout: 76 | error_msg = f"API call timed out after {timeout} seconds (Attempt {attempt + 1}/{retries + 1})." 77 | print(f"\n{tool_name} request failed (Timeout).") 78 | log_to_file(error_msg) 79 | if attempt >= retries: 80 | return None, None # Final attempt failed 81 | 82 | except requests.exceptions.HTTPError as e: 83 | error_msg = f"API call failed with HTTP {e.response.status_code} (Attempt {attempt + 1}/{retries + 1}): {e}" 84 | print(f"\n{tool_name} request failed ({e.response.status_code}).") 85 | log_to_file(error_msg) 86 | if e.response.status_code != 429 or attempt >= retries: 87 | return None, None # Fail on non-429 errors or if retries are exhausted 88 | 89 | except (requests.exceptions.RequestException, ValueError, KeyError, IndexError) as e: 90 | error_msg = f"An error occurred during API call or response parsing (Attempt {attempt + 1}/{retries + 1}): {e}" 91 | print(f"\n{tool_name} request failed.") 92 | log_to_file(f"{error_msg}\nRaw Response (if available):\n{locals().get('response', 'N/A')}") 93 | if attempt >= retries: 94 | return None, None # Final attempt failed 95 | 96 | # If we are going to retry, calculate wait time and log it 97 | if attempt < retries: 98 | wait_time = base_wait_time * (2 ** attempt) + random.uniform(0, 1) # Exponential backoff with jitter 99 | print(f"Waiting for {wait_time:.2f} seconds before retrying...") 100 | log_to_file(f"Retrying after {wait_time:.2f} seconds.") 101 | time.sleep(wait_time) 102 | 103 | return None, None # Should be unreachable, but as a fallback -------------------------------------------------------------------------------- /installation_readme.md: -------------------------------------------------------------------------------- 1 | # Analysis of orpheus_Installer.sh 2 | 3 | This document outlines the components, prerequisites, and setup steps performed or guided by the `orpheus_Installer.sh` script. 4 | 5 | ## 1. System Program Prerequisites (Checks) 6 | 7 | The script checks if the following command-line programs are installed before proceeding. If not found, it typically suggests an installation command (often using `apt`) or provides a link to installation instructions. 8 | 9 | * **`git`**: Required for cloning the necessary GitHub repository. 10 | * **`docker`**: Required for running the core application components in containers. 11 | * **`docker-compose`** (or `docker compose`): Required for orchestrating the Docker containers defined in the compose file. 12 | * **`python3`**: The Python 3 interpreter needed for the host virtual environment setup and potentially running helper scripts. (Checked using `command -v python3`). 13 | * **`pip3`**: The Python package installer for Python 3, used within the host virtual environment. (Checked using `command -v pip3`). 14 | * **`ffmpeg`**: A multimedia framework, likely needed for audio processing by the TTS system or related scripts. 15 | 16 | 17 | ### Optional Automatic Dependency Installation 18 | 19 | * The script attempts to detect your Linux distribution (Debian/Ubuntu, Arch, Fedora/RHEL, openSUSE families, and derivatives like Mint, Pop!_OS, EndeavourOS, etc. are supported). 20 | * If detected, it will list recommended system libraries (like `ffmpeg`, `python3-tk`, etc.) needed for the *host* Python scripts. 21 | * It will then ask if you want to attempt installing these using the system's package manager (`apt`, `pacman`, `dnf`/`yum`) via `sudo`. 22 | * Answering 'y' (Yes) will trigger the installation attempt. Answering 'n' (No) or pressing Enter (default) will skip this step, requiring manual installation if needed. 23 | 24 | ### Optional/Recommended System Libraries (Warnings) 25 | 26 | The script warns that the *host* Python scripts (`mainv3.py`, `orpheus_tts.py`) might require additional system libraries, suggesting installation commands: 27 | 28 | * `python3-tk` (for Tkinter GUI elements) 29 | * `libsndfile1` (for audio file handling) 30 | * `portaudio19-dev` (for audio I/O) 31 | * Selenium WebDriver (requires a browser like Chrome and its corresponding `chromedriver` in the system PATH) - *Note: The script attempts to install these.* 32 | 33 | ### GPU-Specific Checks 34 | 35 | * **`nvidia-smi`**: Used to detect if an NVIDIA GPU is present. 36 | * **`nvidia-container-runtime` / `nvidia-container-toolkit`**: Checks for the necessary toolkit to allow Docker containers to access the NVIDIA GPU. 37 | 38 | ## 2. GitHub Projects 39 | 40 | The script clones or updates the following repository: 41 | 42 | * **Repository:** `https://github.com/Lex-au/Orpheus-FastAPI.git` 43 | * **Destination:** Cloned into the `Orpheus-FastAPI` subdirectory within the user-specified installation directory (default: `orpheus_tts_setup`). 44 | 45 | ## 3. Python Virtual Environment (`venv`) 46 | 47 | The script sets up a dedicated Python virtual environment for host-level scripts: 48 | 49 | * **Type:** Standard Python `venv`. 50 | * **Name:** `host_venv` 51 | * **Location:** Created inside the main installation directory (e.g., `orpheus_tts_setup/host_venv`). 52 | * **Purpose:** To install Python dependencies for `mainv3.py` and `orpheus_tts.py` without interfering with the system's global Python environment. 53 | 54 | ## 4. Pip Packages 55 | 56 | Python packages are installed using `pip3` *within* the `host_venv` virtual environment: 57 | 58 | * **Source:** Packages listed in the `requirements_host.txt` file (expected to be in the directory *parent* to the installation directory). 59 | * **Action:** `pip3 install -r ../requirements_host.txt` is executed within the activated `host_venv`. 60 | * **Specific Packages:** The exact packages depend on the contents of `requirements_host.txt` (not included in the installer script itself). 61 | * **NLTK Data:** Downloads the 'punkt' tokenizer data (`python3 -m nltk.downloader punkt`) required by the NLTK library (which is presumably listed in `requirements_host.txt`). 62 | * **Pip Upgrade:** Upgrades `pip` itself within the `host_venv`. 63 | 64 | ## 5. Docker Components (User Action Required Post-Script) 65 | 66 | The script itself **does not** build or run Docker containers. It prepares the necessary files and instructs the user on how to start the services using Docker Compose. 67 | 68 | * **Configuration File:** The primary file used is `docker-compose-gpu.yml` located inside the cloned `Orpheus-FastAPI` directory. 69 | * **User Command:** The user is instructed to run `docker compose -f docker-compose-gpu.yml up` (or `docker-compose ...` for V1 syntax). 70 | * **Expected Services (Based on Compose File):** 71 | * A FastAPI web application container (likely built from `Dockerfile.gpu` or `Dockerfile.cpu` in the `Orpheus-FastAPI` repo). 72 | * A `llama.cpp` server container (likely pulled from a registry or built, responsible for running the GGUF model). 73 | * **Model Management:** The script defines the URL and filename for the `Orpheus-3b-FT-Q8_0.gguf` model, but the download is likely handled *within* the Docker environment orchestrated by the compose file, not directly by the installer script. The script notes the model is "managed by Docker Compose". 74 | * **GPU vs CPU:** The script checks for GPU capabilities and defaults to instructing the user to use `docker-compose-gpu.yml`. It warns that a CPU-specific compose file might be needed if no GPU is available or configured correctly with the NVIDIA Container Toolkit. 75 | 76 | ## 6. Conda Environments 77 | 78 | The script does **not** use or create any Conda environments. 79 | -------------------------------------------------------------------------------- /functions/config.py: -------------------------------------------------------------------------------- 1 | import os 2 | import yaml 3 | import json # Although json is not directly used in these functions, it's used in call_ai_api which relies on config. 4 | from dotenv import load_dotenv 5 | from .utils import log_to_file # Import log_to_file from the new utils module 6 | 7 | # Define script directory and LLM directory relative to this new structure 8 | # Assuming the new_script_builder.py will be in new_style/ 9 | # And settings are still relative to the original Ecne-AI-Podcaster directory. 10 | # We need to adjust paths accordingly. 11 | # The original SCRIPT_DIR was os.path.dirname(__file__) from Ecne-AI-Podcaster/script_builder.py 12 | # The new script will be in new_style/, so relative paths need adjustment. 13 | # Let's assume the settings directory remains relative to the project base 14 | NEW_SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) # Directory of this file (e.g., .../Ecne-AI-Podcasterv2/functions) 15 | # Go up one level from the functions directory to reach the project root (e.g., .../Ecne-AI-Podcasterv2) 16 | PROJECT_BASE_DIR = os.path.abspath(os.path.join(NEW_SCRIPT_DIR, '..')) 17 | LLM_DIR = os.path.join(PROJECT_BASE_DIR, "settings/llm_settings") 18 | 19 | 20 | def load_config(): 21 | """Loads configuration from .env file and ai_models.yml.""" 22 | load_dotenv() 23 | config = { 24 | # API endpoint and key are now loaded from ai_models.yml based on selection 25 | "google_api_key": os.getenv("GOOGLE_API_KEY"), 26 | "google_cse_id": os.getenv("GOOGLE_CSE_ID"), 27 | "brave_api_key": os.getenv("BRAVE_API_KEY"), 28 | # Reddit keys are loaded but unused in current scraping logic 29 | "reddit_client_id": os.getenv("REDDIT_CLIENT_ID"), 30 | "reddit_client_secret": os.getenv("REDDIT_CLIENT_SECRET"), 31 | "reddit_user_agent": os.getenv("REDDIT_USER_AGENT"), 32 | } 33 | 34 | # --- Load Model Configurations --- 35 | models_config_path = os.path.join(LLM_DIR, 'ai_models.yml') 36 | try: 37 | with open(models_config_path, 'r', encoding='utf-8') as f: 38 | models_config = yaml.safe_load(f) 39 | if not models_config or not isinstance(models_config, dict): 40 | raise ValueError("ai_models.yml is empty or not a valid dictionary.") 41 | print(f"Loaded model configurations from {models_config_path}") 42 | log_to_file(f"Loaded model configurations from {models_config_path}") 43 | except FileNotFoundError: 44 | print(f"Error: Model configuration file not found at {models_config_path}") 45 | log_to_file(f"Error: Model configuration file not found at {models_config_path}") 46 | # In a modular structure, we might return None or raise a specific error 47 | # For now, maintaining original behavior of exiting 48 | exit(1) 49 | except (yaml.YAMLError, ValueError) as e: 50 | print(f"Error parsing model configuration file {models_config_path}: {e}") 51 | log_to_file(f"Error parsing model configuration file {models_config_path}: {e}") 52 | exit(1) 53 | 54 | # NOTE: Model selection logic moved to main() after args parsing 55 | 56 | # Basic validation 57 | # Check search APIs 58 | google_ok = config.get("google_api_key") and config.get("google_cse_id") 59 | brave_ok = config.get("brave_api_key") 60 | if not google_ok and not brave_ok: 61 | print("Warning: Neither Google (API Key + CSE ID) nor Brave API Key are set. Web search will fail.") 62 | log_to_file("Warning: Neither Google (API Key + CSE ID) nor Brave API Key are set. Web search will fail.") 63 | 64 | # Check Reddit API creds 65 | reddit_ok = all(config.get(k) for k in ["reddit_client_id", "reddit_client_secret", "reddit_user_agent"]) 66 | if not reddit_ok: 67 | print("Warning: Reddit credentials (client_id, client_secret, user_agent) missing in .env. Reddit scraping via PRAW will fail.") 68 | log_to_file("Warning: Reddit credentials (client_id, client_secret, user_agent) missing in .env. Reddit scraping via PRAW will fail.") 69 | 70 | print("Configuration loaded.") 71 | log_to_file("Configuration loaded successfully.") 72 | # Return both basic config and the loaded models dictionary 73 | return config, models_config 74 | 75 | def load_character_profile(filepath): 76 | """Loads character profile from a YAML file.""" 77 | try: 78 | print(f"Loading character profile from {filepath}") 79 | log_to_file(f"Attempting to load character profile from {filepath}") 80 | with open(filepath, 'r', encoding='utf-8') as f: 81 | profile = yaml.safe_load(f) 82 | # Ensure podcast name is loaded if present 83 | if 'podcast_name' not in profile: 84 | print(f"Warning: 'podcast_name' not found in profile {filepath}. Using default.") 85 | log_to_file(f"Warning: 'podcast_name' not found in profile {filepath}. Using default.") 86 | profile.setdefault('podcast_name', 'Podcast') # Default if missing 87 | print(f"Loaded character profile from {filepath}") 88 | log_to_file(f"Successfully loaded character profile from {filepath}") 89 | return profile 90 | except FileNotFoundError: 91 | print(f"Error: Character profile file not found at {filepath}") 92 | log_to_file(f"Error: Character profile file not found at {filepath}") 93 | return None 94 | except yaml.YAMLError as e: 95 | print(f"Error parsing YAML file {filepath}: {e}") 96 | log_to_file(f"Error parsing YAML file {filepath}: {e}") 97 | return None 98 | except Exception as e: 99 | print(f"An unexpected error occurred loading {filepath}: {e}") 100 | log_to_file(f"An unexpected error occurred loading {filepath}: {e}") 101 | return None -------------------------------------------------------------------------------- /functions/search/discovery.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import time 3 | import random 4 | import re 5 | 6 | from ..ai import call_ai_api # Import call_ai_api from the new ai module 7 | from ..utils import log_to_file, parse_ai_tool_response, USER_AGENTS # Import utilities 8 | 9 | def discover_sources(keywords_list, config, args): # Added args parameter 10 | """Uses AI to discover relevant websites and subreddits.""" 11 | print("\nDiscovering sources via AI...") 12 | log_to_file("Starting source discovery phase.") 13 | # Use the first keyword/phrase for simplicity, or combine them 14 | discovery_keyword_str = " | ".join(keywords_list) 15 | print(f"Using keywords for discovery: '{discovery_keyword_str}'") 16 | log_to_file(f"Keywords for discovery: '{discovery_keyword_str}'") 17 | 18 | prompt = ( 19 | f"Based on the keywords '{discovery_keyword_str}', suggest relevant information sources. " 20 | f"Include specific websites (news sites, reputable blogs, official project sites) and relevant subreddits. " 21 | f"Prioritize sources known for reliable, detailed information on this topic.\n" 22 | f"Format your response strictly within tags, listing each source URL or subreddit name (e.g., 'r/technology' or 'techcrunch.com') on a new line.\n" 23 | f"Example:\n\ntechcrunch.com\nwired.com\nexampleblog.net/relevant-section\nr/artificial\nr/machinelearning\n" 24 | ) 25 | 26 | raw_response, cleaned_response = call_ai_api(prompt, config, tool_name="SourceDiscovery", timeout=args.ai_timeout, retries=args.ai_retries) 27 | 28 | if not cleaned_response: 29 | log_to_file("Error: No response received from AI API for source discovery.") 30 | print("\nError: No response received from AI API for source discovery.") 31 | return [] 32 | 33 | sources_str = parse_ai_tool_response(cleaned_response, "toolWebsites") 34 | 35 | if not sources_str or sources_str == cleaned_response: # Parsing failed or tag missing 36 | log_to_file("Error: Could not parse tag in source discovery response.") 37 | print("\nError: Could not parse tag in source discovery response.") 38 | return [] 39 | 40 | # Remove trailing parenthetical explanations before validation 41 | sources_list_raw = [line.strip() for line in sources_str.split('\n') if line.strip()] 42 | sources_list = [] 43 | for line in sources_list_raw: 44 | # Remove ' (explanation...)' from the end of the line 45 | cleaned_line = re.sub(r'\s*\(.*\)\s*$', '', line).strip() 46 | if cleaned_line: 47 | # Handle domain names without protocol 48 | if '.' in cleaned_line and not cleaned_line.startswith(('http://', 'https://', 'r/')): 49 | cleaned_line = f"https://{cleaned_line}" 50 | # Add if it's a valid URL or reddit source 51 | if cleaned_line.startswith(('http://', 'https://', 'r/')): 52 | sources_list.append(cleaned_line) 53 | 54 | if not sources_list: 55 | log_to_file(f"Warning: No valid sources extracted after parsing.\nParsed content: {sources_str}") 56 | print("\nWarning: No valid sources extracted after parsing.") 57 | return [] 58 | 59 | print(f"Discovered {len(sources_list)} potential sources.") 60 | log_to_file(f"Discovered {len(sources_list)} potential sources.") 61 | 62 | # --- Add Source Validation --- 63 | validated_sources = [] 64 | print("Validating sources...") 65 | log_to_file("Validating discovered sources.") 66 | for source in sources_list: 67 | is_valid = False 68 | print(f" - Checking: {source}...", end="") 69 | try: 70 | if source.startswith('r/'): # Assume subreddit exists if AI suggested 71 | is_valid = True 72 | print(" OK (Subreddit)") 73 | else: # Check website accessibility 74 | # Prepend http:// if no scheme exists 75 | url_to_check = source if source.startswith(('http://', 'https://')) else f'http://{source}' 76 | # Use HEAD request for efficiency 77 | response = requests.head(url_to_check, headers={'User-Agent': random.choice(USER_AGENTS)}, timeout=10, allow_redirects=True) 78 | if response.status_code < 400: # OK or Redirect 79 | is_valid = True 80 | print(f" OK (Status: {response.status_code})") 81 | else: 82 | print(f" Failed (Status: {response.status_code})") 83 | except requests.exceptions.RequestException as e: 84 | print(f" Failed (Error: {e})") 85 | log_to_file(f"Source validation failed for {source}: {e}") 86 | except Exception as e: 87 | print(f" Failed (Unexpected Error: {e})") 88 | log_to_file(f"Source validation failed for {source} (Unexpected): {e}") 89 | 90 | if is_valid: 91 | validated_sources.append(source) 92 | time.sleep(0.5) # Small delay between checks 93 | 94 | print(f"Validated {len(validated_sources)} sources: {validated_sources}") 95 | log_to_file(f"Validated {len(validated_sources)} sources: {validated_sources}") 96 | 97 | # --- Filter Reddit sources if --no-reddit is specified --- 98 | if args.no_reddit: 99 | non_reddit_sources = [src for src in validated_sources if not (src.startswith('r/') or 'reddit.com/r/' in src)] 100 | print(f"Filtering Reddit sources due to --no-reddit flag. Using {len(non_reddit_sources)} non-Reddit sources.") 101 | log_to_file(f"Source Discovery: Filtered out Reddit sources. Using {len(non_reddit_sources)} sources: {non_reddit_sources}") 102 | return non_reddit_sources 103 | else: 104 | return validated_sources -------------------------------------------------------------------------------- /functions/tts/args.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | 4 | # Define Languages and Voices (copied from orpheus_tts.py for now, will be removed if centralized) 5 | LANGUAGES_VOICES = { 6 | 'English': ['tara', 'leah', 'jess', 'leo', 'dan', 'mia', 'zac', 'zoe'], 7 | 'French': ['pierre', 'amelie', 'marie'], 8 | 'German': ['jana', 'thomas', 'max'], 9 | 'Korean': ['유나', '준서'], 10 | 'Hindi': ['ऋतिका'], 11 | 'Mandarin': ['长乐', '白芷'], 12 | 'Spanish': ['javi', 'sergio', 'maria'], 13 | 'Italian': ['pietro', 'giulia', 'carlo'] 14 | } 15 | LANGUAGES = list(LANGUAGES_VOICES.keys()) 16 | ALL_VOICES = [voice for lang_voices in LANGUAGES_VOICES.values() for voice in lang_voices] 17 | 18 | def parse_tts_arguments(): 19 | """ 20 | Parses command-line arguments specific to the TTS builder. 21 | """ 22 | parser = argparse.ArgumentParser( 23 | description="Generate speech from text or a script file using Orpheus TTS FastAPI endpoint.", 24 | epilog="Examples:\n" 25 | " Single sentence: python3 tts_builder.py --input \"Hello there.\" --voice leo --output single\n" 26 | " From script: python3 tts_builder.py --script podcast.txt --host-voice leo --guest-voice tara --output podcast_audio.wav --silence 0.5\n" 27 | " Dev Mode: python3 tts_builder.py --script podcast.txt --dev --output dev_test.wav --silence 0.5\n" 28 | " Expanded: python3 tts_builder.py --script podcast_script_small.txt --host-voice leo --guest-voice tara --output simple_test_script --dev --guest-breakup --video-resolution \"1920x1080\" --video-fps 24 --video-intermediate-preset slow --video-intermediate-crf 18 --video-final-audio-bitrate 320k", 29 | formatter_class=argparse.RawDescriptionHelpFormatter 30 | ) 31 | 32 | # --- Input Arguments (Mutually Exclusive) --- 33 | group = parser.add_mutually_exclusive_group(required=True) 34 | group.add_argument('--input', type=str, help='Single text input to synthesize.') 35 | group.add_argument('--script', type=str, help='Path to a script file (.txt) with lines like "Speaker: Dialogue".') 36 | group.add_argument('--resume-from-json', type=str, help='Path to a podcast JSON file to resume editing.') 37 | 38 | # --- Script Specific Arguments --- 39 | parser.add_argument('--host-voice', type=str, default='leo', 40 | help='Voice to use for lines starting with "Host:" (script mode only, default: leo).') 41 | parser.add_argument('--guest-voice', type=str, default='tara', 42 | help='Voice to use for lines starting with "Guest:" (script mode only, default: tara).') 43 | parser.add_argument('--silence', type=float, default=1.0, 44 | help='Duration of silence in seconds between script lines (default: 1.0). Use 0 to disable.') 45 | 46 | # --- General Arguments --- 47 | parser.add_argument('--voice', type=str, default='tara', 48 | help='Voice to use for single --input (default: tara).') 49 | parser.add_argument('--speed', type=float, default=1.0, 50 | help='Speech speed factor (0.5 to 1.5, default: 1.0).') 51 | parser.add_argument('--port', type=int, default=5005, 52 | help='Port the Orpheus-FastAPI server is running on (default: 5005).') 53 | parser.add_argument('--api-host', type=str, default='127.0.0.1', 54 | help='Host the Orpheus-FastAPI server is running on (default: 127.0.0.1).') 55 | parser.add_argument('--output', type=str, default='output_speech.wav', 56 | help='Output filename for the generated audio (default: output_speech.wav).') 57 | parser.add_argument('--dev', action='store_true', 58 | help='Enable development mode: launch GUI to review/redo segments before finalizing.') 59 | parser.add_argument('--guest-breakup', action='store_true', 60 | help='Break Guest dialogue into sentences for separate TTS processing.') 61 | parser.add_argument('--tts-max-retries', type=int, default=3, 62 | help='Maximum number of retry attempts for failed TTS requests (default: 3).') 63 | parser.add_argument('--tts-timeout', type=int, default=180, 64 | help='Timeout in seconds for each TTS request (default: 180).') 65 | 66 | # --- Video Generation Arguments (used when --dev is enabled) --- 67 | video_group = parser.add_argument_group('Video Generation Options (--dev mode only)') 68 | video_group.add_argument('--video-resolution', type=str, default="1280x720", 69 | help='Video resolution (e.g., "1920x1080"). Default determined by first background, fallback to this.') 70 | video_group.add_argument('--video-fps', type=int, default=24, 71 | help='Frames per second for the output video.') 72 | video_group.add_argument('--video-character-scale', type=float, default=1.0, 73 | help='Scale factor for character images in the video.') 74 | video_group.add_argument('--video-fade', type=float, default=1.0, 75 | help='Video fade duration for intro/outro segments.') 76 | video_group.add_argument('--video-intermediate-preset', default='medium', 77 | help='Encoding preset for intermediate video segments (e.g., ultrafast, medium, slow).') 78 | video_group.add_argument('--video-intermediate-crf', type=int, default=23, 79 | help='CRF value for intermediate video segments (0-51, lower is better quality).') 80 | video_group.add_argument('--video-final-audio-bitrate', default='192k', 81 | help='Bitrate for final AAC audio encoding (e.g., 128k, 192k).') 82 | video_group.add_argument('--video-workers', type=int, default=None, 83 | help='Number of worker processes for video generation. Defaults to CPU count.') 84 | video_group.add_argument('--video-keep-temp', action='store_true', 85 | help='Keep temporary video segment files after completion.') 86 | 87 | return parser.parse_args() 88 | 89 | if __name__ == '__main__': 90 | args = parse_tts_arguments() 91 | print("Parsed Arguments:") 92 | for arg, value in vars(args).items(): 93 | print(f" {arg}: {value}") -------------------------------------------------------------------------------- /functions/search/api.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import time 3 | import random 4 | import datetime 5 | import json 6 | import urllib.parse 7 | 8 | from ..utils import log_to_file, USER_AGENTS # Import utilities from functions.utils 9 | 10 | # --- Search API Functions --- 11 | 12 | def search_google_api(query, config, num_results, from_date=None, to_date=None): 13 | """Performs search using Google Custom Search API.""" 14 | urls = [] 15 | api_key = config.get("google_api_key") 16 | cse_id = config.get("google_cse_id") 17 | if not api_key or not cse_id: 18 | log_to_file("Google API search skipped: API Key or CSE ID missing.") 19 | return None 20 | 21 | search_url = "https://www.googleapis.com/customsearch/v1" 22 | # Add date ranges using Google's `sort=date:r:YYYYMMDD:YYYYMMDD` parameter 23 | date_restrict = "" 24 | if from_date: 25 | try: 26 | from_dt_str = datetime.datetime.strptime(from_date, '%Y-%m-%d').strftime('%Y%m%d') 27 | to_dt_str = datetime.datetime.strptime(to_date, '%Y-%m-%d').strftime('%Y%m%d') if to_date else datetime.datetime.now().strftime('%Y%m%d') 28 | date_restrict = f"date:r:{from_dt_str}:{to_dt_str}" 29 | except ValueError: 30 | print(f" - Warning: Invalid date format for Google search '{from_date}' or '{to_date}'. Ignoring date range.") 31 | log_to_file(f"Google API Warning: Invalid date format '{from_date}'/'{to_date}'. Ignoring date range.") 32 | 33 | print(f" - Searching Google API: '{query}' (Num: {num_results}, Date: '{date_restrict or 'None'}')") 34 | log_to_file(f"Google API Search: Query='{query}', Num={num_results}, DateRestrict='{date_restrict}'") 35 | 36 | params = {'key': api_key, 'cx': cse_id, 'q': query, 'num': min(num_results, 10)} 37 | if date_restrict: 38 | params['sort'] = date_restrict # Add sort parameter for date range 39 | 40 | try: 41 | response = requests.get(search_url, params=params, timeout=20) 42 | response.raise_for_status() 43 | search_data = response.json() 44 | 45 | if 'items' in search_data: 46 | urls = [item['link'] for item in search_data['items'] if 'link' in item] 47 | print(f" - Google Found: {len(urls)} results.") 48 | log_to_file(f"Google API Success: Found {len(urls)} URLs.") 49 | else: 50 | print(" - Google Found: 0 results.") 51 | log_to_file("Google API Success: No items found in response.") 52 | 53 | if 'error' in search_data and search_data['error'].get('code') == 429: 54 | print(" - !! Google API Quota limit likely reached !!") 55 | log_to_file("Google API Error: Quota limit reached (429 in response body).") 56 | return 'quota_error' 57 | return urls 58 | 59 | except requests.exceptions.HTTPError as e: 60 | print(f" - Error calling Google API: {e}") 61 | log_to_file(f"Google API HTTP Error: {e}") 62 | if e.response.status_code == 429: 63 | print(" - !! Google API Quota limit likely reached (HTTP 429) !!") 64 | log_to_file("Google API Error: Quota limit reached (HTTP 429).") 65 | return 'quota_error' 66 | return None 67 | except requests.exceptions.RequestException as e: 68 | print(f" - Error calling Google API: {e}") 69 | log_to_file(f"Google API Request Error: {e}") 70 | return None 71 | except Exception as e: 72 | print(f" - Unexpected error during Google API search: {e}") 73 | log_to_file(f"Google API Unexpected Error: {e}") 74 | return None 75 | finally: 76 | time.sleep(random.uniform(1, 2)) # Delay 77 | 78 | def search_brave_api(query, config, num_results, from_date=None, to_date=None): 79 | """Performs search using Brave Search API.""" 80 | urls = [] 81 | api_key = config.get("brave_api_key") 82 | if not api_key: 83 | log_to_file("Brave API search skipped: API Key missing.") 84 | return None 85 | 86 | search_url = "https://api.search.brave.com/res/v1/web/search" 87 | headers = {"Accept": "application/json", "Accept-Encoding": "gzip", "X-Subscription-Token": api_key} 88 | freshness_param = None 89 | 90 | if from_date: 91 | try: 92 | from_dt = datetime.datetime.strptime(from_date, '%Y-%m-%d') 93 | freshness_start = from_dt.strftime('%Y%m%d') 94 | freshness_end = "" 95 | if to_date: 96 | to_dt = datetime.datetime.strptime(to_date, '%Y-%m-%d') 97 | freshness_end = to_dt.strftime('%Y%m%d') 98 | freshness_param = f"pd:{freshness_start},{freshness_end}" 99 | except ValueError: 100 | print(f" - Warning: Invalid date format for Brave freshness '{from_date}' or '{to_date}'. Skipping date filter.") 101 | log_to_file(f"Brave API Warning: Invalid date format '{from_date}'/'{to_date}' for freshness.") 102 | 103 | print(f" - Searching Brave API: '{query}' (Num: {num_results}, Freshness: '{freshness_param or 'None'}')") 104 | log_to_file(f"Brave API Search: Query='{query}', Num={num_results}, Freshness='{freshness_param}'") 105 | 106 | params = {'q': query, 'count': num_results} 107 | if freshness_param: params['freshness'] = freshness_param 108 | 109 | try: 110 | # Log the exact request details before sending 111 | prepared_request = requests.Request('GET', search_url, headers=headers, params=params).prepare() 112 | log_to_file(f"Brave API Request Details:\n URL: {prepared_request.url}\n Headers: {prepared_request.headers}") 113 | print(f" - Requesting URL: {prepared_request.url}") # Also print URL for easier debugging 114 | 115 | response = requests.get(search_url, headers=headers, params=params, timeout=20) 116 | response.raise_for_status() 117 | search_data = response.json() 118 | log_to_file(f"Brave API Raw Response Body:\n{json.dumps(search_data, indent=2)}") # Log the raw JSON response 119 | 120 | if 'web' in search_data and 'results' in search_data['web']: 121 | urls = [item['url'] for item in search_data['web']['results'] if 'url' in item] 122 | print(f" - Brave Found: {len(urls)} results.") 123 | log_to_file(f"Brave API Success: Found {len(urls)} URLs.") 124 | else: 125 | print(" - Brave Found: 0 results.") 126 | log_to_file(f"Brave API Success: No web/results found in response. Keys: {search_data.keys()}") 127 | return urls 128 | 129 | except requests.exceptions.HTTPError as e: 130 | print(f" - Error calling Brave API: {e}") 131 | log_to_file(f"Brave API HTTP Error: {e}") 132 | if e.response.status_code == 429: 133 | print(" - !! Brave API Quota limit likely reached (HTTP 429) !!") 134 | log_to_file("Brave API Error: Quota limit reached (HTTP 429).") 135 | return 'quota_error' 136 | return None 137 | except requests.exceptions.RequestException as e: 138 | print(f" - Error calling Brave API: {e}") 139 | log_to_file(f"Brave API Request Error: {e}") 140 | return None 141 | except Exception as e: 142 | print(f" - Unexpected error during Brave API search: {e}") 143 | log_to_file(f"Brave API Unexpected Error: {e}") 144 | return None 145 | finally: 146 | time.sleep(random.uniform(1, 2)) # Delay -------------------------------------------------------------------------------- /functions/args.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import yaml 4 | import datetime 5 | import urllib.parse 6 | 7 | # Define LLM_DIR relative to this new structure 8 | # Go up one level from functions, then into Ecne-AI-Podcaster, then settings/llm_settings 9 | NEW_SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) 10 | ORIGINAL_BASE_DIR = os.path.abspath(os.path.join(NEW_SCRIPT_DIR, '..', '..', 'Ecne-AI-Podcaster')) 11 | LLM_DIR = os.path.join(ORIGINAL_BASE_DIR, "settings/llm_settings") 12 | 13 | 14 | def parse_arguments(): 15 | """Parses command-line arguments.""" 16 | parser = argparse.ArgumentParser(description="Generate an AI podcast episode.") 17 | 18 | # --- Load model keys dynamically for choices --- 19 | available_model_keys = [] 20 | models_config_path = os.path.join(LLM_DIR, 'ai_models.yml') 21 | 22 | try: 23 | with open(models_config_path, 'r', encoding='utf-8') as f: 24 | models_config = yaml.safe_load(f) 25 | if models_config and isinstance(models_config, dict): 26 | available_model_keys = list(models_config.keys()) 27 | else: 28 | print(f"Warning: Could not load valid model keys from {models_config_path}. --llm-model argument might fail.") 29 | except Exception as e: 30 | print(f"Warning: Error loading {models_config_path} for arg parsing: {e}. --llm-model argument might fail.") 31 | # --- End model key loading --- 32 | 33 | # --- Define Arguments --- 34 | # Core 35 | # Made keywords not required, will validate later based on --no-search 36 | parser.add_argument("--keywords", type=str, default=None, help="Comma-separated keywords/phrases for searching (required unless --no-search is used).") 37 | parser.add_argument("--topic", type=str, required=True, help="The main topic phrase for the podcast episode.") 38 | # AI Model Selection 39 | parser.add_argument("--llm-model", type=str, default=None, choices=available_model_keys if available_model_keys else None, 40 | help="Specify the LLM configuration key from ai_models.yml to use (overrides .env setting).") 41 | # Search & Scraping 42 | parser.add_argument("--api", choices=['google', 'brave'], default='google', help="Preferred search API ('google' or 'brave').") 43 | parser.add_argument("--from_date", type=str, default=None, help="Start date for search (YYYY-MM-DD).") 44 | parser.add_argument("--to_date", type=str, default=None, help="End date for search (YYYY-MM-DD).") 45 | parser.add_argument("--max-web-results", type=int, default=3, help="Max results per website source domain.") 46 | parser.add_argument("--max-reddit-results", type=int, default=5, help="Max *posts* to scrape per subreddit source.") 47 | parser.add_argument("--max-reddit-comments", type=int, default=5, help="Max *comments* to scrape per Reddit post.") 48 | parser.add_argument("--per-keyword-results", type=int, default=None, help="Web results per keyword (defaults to max-web-results).") 49 | parser.add_argument("--combine-keywords", action="store_true", help="Treat keywords as one search query (legacy).") 50 | # Output & Content 51 | parser.add_argument("--report", action="store_true", help="Generate a written report in addition to the script.") 52 | parser.add_argument("--youtube-description", action="store_true", help="Generate a YouTube description based on the report.") 53 | parser.add_argument("--score-threshold", type=int, default=5, help="Minimum summary score (0-10) to include in script.") 54 | parser.add_argument("--ai-timeout", type=int, default=120, help="Global timeout in seconds for all AI API calls.") 55 | parser.add_argument("--ai-retries", type=int, default=5, help="Global number of retries for all AI API calls.") 56 | parser.add_argument("--guidance", type=str, default=None, help="Additional guidance/instructions string for the LLM prompts.") 57 | parser.add_argument("--direct-articles", type=str, default=None, help="Path to a text file containing a list of article URLs (one per line) to scrape directly.") 58 | parser.add_argument("--no-search", action="store_true", help="Skip AI source discovery and web search APIs. Requires --direct-articles to be set.") 59 | # parser.add_argument("--sources", type=str, default=None, help="Comma-separated list of sources to use instead of AI discovery.") 60 | parser.add_argument("--reference-docs", type=str, default=None, help="Comma-separated paths to text files containing reference information.") 61 | parser.add_argument("--reference-docs-summarize", action="store_true", help="Summarize and score reference docs before including them.") 62 | parser.add_argument("--reference-docs-folder", type=str, default=None, help="Path to a folder containing reference documents (txt, pdf, docx).") 63 | parser.add_argument("--no-reddit", action="store_true", help="Exclude Reddit sources from discovery and scraping.") 64 | parser.add_argument("--single-speaker", action="store_true", help="Generate a single-speaker podcast script (Host only, no Guest).") 65 | 66 | args = parser.parse_args() 67 | 68 | # Set default for per_keyword_results 69 | if args.per_keyword_results is None: 70 | args.per_keyword_results = args.max_web_results 71 | 72 | # Process keywords only if provided 73 | search_queries = [] # Initialize default 74 | if args.keywords: 75 | if args.combine_keywords: 76 | raw_keywords = [k.strip() for k in args.keywords.split(',') if k.strip()] 77 | if not raw_keywords: raise ValueError("Please provide at least one keyword if using --keywords.") 78 | search_queries = [" ".join(raw_keywords)] 79 | print("Keywords combined into a single search query.") 80 | else: 81 | search_queries = [k.strip() for k in args.keywords.split(',') if k.strip()] 82 | if not search_queries: raise ValueError("Please provide at least one keyword/phrase if using --keywords.") 83 | print(f"Processing {len(search_queries)} separate search queries.") 84 | elif not args.no_search: # Keywords are required if we ARE doing a search 85 | parser.error("--keywords is required unless --no-search is specified.") 86 | 87 | # Validate dates 88 | def validate_date(date_str): 89 | if date_str is None: return None 90 | try: 91 | datetime.datetime.strptime(date_str, '%Y-%m-%d') 92 | return date_str 93 | except ValueError: 94 | raise ValueError(f"Invalid date format '{date_str}'. Use YYYY-MM-DD.") 95 | 96 | args.from_date = validate_date(args.from_date) 97 | args.to_date = validate_date(args.to_date) 98 | 99 | args.search_queries = search_queries # Store the processed list (or empty list) back into args 100 | print(f"Args: {vars(args)}") 101 | print(f"Parsed Args: {vars(args)}") # Keep print statement 102 | 103 | # Validation: --no-search requires --direct-articles OR reference docs/folder 104 | # Modified this validation slightly: If --no-search is used, *some* form of input context is needed. 105 | if args.no_search and not args.direct_articles and not args.reference_docs and not args.reference_docs_folder: 106 | parser.error("--no-search requires at least one of --direct-articles, --reference-docs, or --reference-docs-folder to be specified.") 107 | 108 | # Validation: Keywords are required if search is active 109 | if not args.no_search and not args.keywords: 110 | # This check is now done during keyword processing above, but double-checking here is safe. 111 | # parser.error("--keywords is required unless --no-search is specified.") 112 | # Re-checking the logic, the check during processing is sufficient. Removing redundant check here. 113 | pass # Validation moved to keyword processing block 114 | 115 | return args 116 | -------------------------------------------------------------------------------- /templates/settings.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | Settings - AI Podcast Generator 7 | 8 | 9 | 10 |
11 |

Settings

12 | 13 | 16 | 17 |
18 |

API Keys (.env)

19 |
20 |
21 | 22 | 23 |
24 |
25 | 26 | 27 |
28 |
29 | 30 | 31 |
32 |
33 | 34 | 35 |
36 |
37 | 38 | 39 |
40 |
41 | 42 | 43 |
44 | 45 |
46 |
47 | 48 |
49 |

LLM Settings (ai_models.yml)

50 |
51 |
52 | 53 | 58 |
59 | 60 | 94 | 95 | 134 | 135 | 136 |
137 |
138 |
139 | 140 | 141 | 142 | 143 | -------------------------------------------------------------------------------- /functions/tts/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import yaml 3 | import numpy as np 4 | import soundfile as sf 5 | import tempfile 6 | import shutil 7 | from scipy.signal import resample # For resampling in concatenate_wavs 8 | 9 | # Override print function to force immediate flushing for real-time output 10 | original_print = print 11 | def print(*args, **kwargs): 12 | kwargs.setdefault('flush', True) 13 | return original_print(*args, **kwargs) 14 | 15 | # Define VOICE_DIR relative to the project root, assuming functions/tts/utils.py is in functions/tts/ 16 | SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) 17 | PROJECT_ROOT = os.path.abspath(os.path.join(SCRIPT_DIR, '..', '..')) # Go up two levels from functions/tts/ 18 | VOICE_DIR = os.path.abspath(os.path.join(PROJECT_ROOT, "settings/voices")) 19 | os.makedirs(VOICE_DIR, exist_ok=True) # Ensure VOICE_DIR exists 20 | 21 | def load_voice_config(voice_name): 22 | """Loads voice configuration from YAML file, falling back to default.""" 23 | base_path = os.path.join(VOICE_DIR, f"{voice_name}.yaml") 24 | default_path = os.path.join(VOICE_DIR, "default.yaml") 25 | config_path = base_path if os.path.exists(base_path) else default_path 26 | 27 | # Hardcoded fallback defaults in case even default.yaml is missing/invalid 28 | hardcoded_defaults = { 29 | 'gain_factor': 1.0, 'trim_end_ms': 0, 'nr_level': 0, 30 | 'compress_thresh': 1.0, 'compress_ratio': 1, 'norm_frame_len': 10, 31 | 'norm_gauss_size': 3, 'deesser_freq': 3000 32 | } 33 | 34 | if not os.path.exists(config_path): 35 | print(f"!! Warning: Voice config not found for '{voice_name}' and default.yaml missing. Using hardcoded defaults.") 36 | return hardcoded_defaults 37 | 38 | try: 39 | with open(config_path, 'r') as f: 40 | config = yaml.safe_load(f) 41 | if config is None: # Handle empty YAML file 42 | print(f"!! Warning: Voice config file '{os.path.basename(config_path)}' is empty. Using hardcoded defaults.") 43 | return hardcoded_defaults 44 | print(f"-> Loaded voice config from: {os.path.basename(config_path)}") 45 | # Merge with hardcoded defaults to ensure all keys exist 46 | final_config = hardcoded_defaults.copy() 47 | final_config.update(config) 48 | return final_config 49 | except yaml.YAMLError as e: 50 | print(f"!! Error parsing voice config file '{os.path.basename(config_path)}': {e}. Using hardcoded defaults.") 51 | return hardcoded_defaults 52 | except Exception as e: 53 | print(f"!! Error loading voice config file '{os.path.basename(config_path)}': {e}. Using hardcoded defaults.") 54 | return hardcoded_defaults 55 | 56 | def generate_silence(duration_s, samplerate, temp_dir): 57 | """Generates a silence WAV file and returns its path.""" 58 | if not samplerate: 59 | print("!! Error: Samplerate required to generate silence. Skipping.") 60 | return None 61 | print(f"\nGenerating {duration_s}s silence segment (SR: {samplerate} Hz)...") 62 | num_samples = int(duration_s * samplerate) 63 | silence_data = np.zeros(num_samples, dtype=np.float32) 64 | 65 | # Use mkstemp for unique file name, ensuring temp_dir is used 66 | temp_fd, temp_path = tempfile.mkstemp(suffix="_silence.wav", prefix="silence_", dir=temp_dir) 67 | os.close(temp_fd) 68 | try: 69 | sf.write(temp_path, silence_data, samplerate, subtype='PCM_16') 70 | print(f"-> Silence saved to {os.path.basename(temp_path)}") 71 | return temp_path 72 | except Exception as e: 73 | print(f"!! Error generating silence file: {e}") 74 | if os.path.exists(temp_path): os.remove(temp_path) 75 | return None 76 | 77 | def concatenate_wavs(file_list, output_filename, target_samplerate): 78 | """Concatenates a list of WAV files into a single output file.""" 79 | if not file_list: 80 | print("!! Error: No segment files provided for concatenation.") 81 | return False 82 | 83 | valid_files = [f for f in file_list if f and os.path.exists(f)] 84 | if not valid_files: 85 | print("!! Error: No valid files found in the list for concatenation.") 86 | return False 87 | 88 | if not target_samplerate: 89 | print("!! Warning: Target samplerate not provided for concatenation.") 90 | # Attempt to get samplerate from the first valid file 91 | try: 92 | info = sf.info(valid_files[0]) 93 | target_samplerate = info.samplerate 94 | print(f"!! Using samplerate from first file ({os.path.basename(valid_files[0])}): {target_samplerate} Hz.") 95 | except Exception as e: 96 | print(f"!! Error: Could not determine target samplerate from first file: {e}") 97 | return False # Cannot proceed without a samplerate 98 | 99 | print(f"\nConcatenating {len(valid_files)} valid segments into {output_filename} (Target SR: {target_samplerate} Hz)...") 100 | output_data = [] 101 | target_channels = 1 # Assume mono 102 | 103 | for i, filepath in enumerate(valid_files): 104 | print(f"-> Processing file {i+1}/{len(valid_files)}: {os.path.basename(filepath)}") 105 | try: 106 | # Check samplerate and resample if needed 107 | info = sf.info(filepath) 108 | data, sr = sf.read(filepath, dtype='float32') 109 | 110 | if info.samplerate != target_samplerate: 111 | print(f"-> Resampling {os.path.basename(filepath)} from {info.samplerate} Hz to {target_samplerate} Hz...") 112 | # Calculate resampling ratio 113 | ratio = target_samplerate / info.samplerate 114 | n_samples = int(len(data) * ratio) 115 | 116 | # Use scipy's resample function for high-quality resampling 117 | data = resample(data, n_samples) 118 | print(f"-> Resampling complete. New length: {len(data)/target_samplerate:.2f}s") 119 | 120 | # Convert to mono if necessary 121 | if info.channels == 2: 122 | print(f"-> Converting {os.path.basename(filepath)} to mono.") 123 | data = np.mean(data, axis=1) 124 | elif info.channels != 1: 125 | print(f"!! Warning: Unexpected channel count ({info.channels}) in {os.path.basename(filepath)}. Attempting to process first channel.") 126 | # Attempt to take the first channel if more than 2? Or skip? Let's try taking first. 127 | if data.ndim > 1: data = data[:, 0] 128 | 129 | 130 | output_data.append(data) 131 | # Duration calculation here might be slightly off if we manipulated channels 132 | # Let's calculate duration based on output samples / target_sr 133 | print(f"-> Appended {os.path.basename(filepath)} ({len(data)/target_samplerate:.2f}s)") 134 | 135 | except Exception as e: 136 | print(f"!! Error reading/processing {os.path.basename(filepath)}: {e}") 137 | print("!! Skipping problematic file.") 138 | continue # Skip file on error 139 | 140 | if not output_data: 141 | print("!! No valid audio data to concatenate after processing.") 142 | return False 143 | 144 | print("Concatenating final audio data...") 145 | final_audio = np.concatenate(output_data) 146 | final_duration = len(final_audio) / target_samplerate 147 | print(f"Final audio length: {final_duration:.2f}s") 148 | 149 | try: 150 | print(f"Writing final audio to {output_filename}...") 151 | sf.write(output_filename, final_audio, target_samplerate, subtype='PCM_16') 152 | print(f"\n✅ Concatenated audio saved successfully to '{output_filename}' ({final_duration:.2f}s)") 153 | return True 154 | except Exception as e: 155 | print(f"!! Error writing final concatenated file '{output_filename}': {e}") 156 | return False -------------------------------------------------------------------------------- /functions/processing/summarization.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | 4 | from ..ai import call_ai_api # Import call_ai_api from the new ai module 5 | from ..utils import log_to_file, clean_thinking_tags, parse_ai_tool_response, run_archive_dir # Import utilities including run_archive_dir 6 | 7 | def summarize_content(scraped_texts, reference_docs_content, topic, config, args): 8 | """ 9 | Uses AI to summarize scraped content and optionally reference documents, 10 | assigning a relevance score to each. 11 | """ 12 | content_to_process = [] 13 | # Add scraped texts with a type identifier 14 | for idx, text in enumerate(scraped_texts): 15 | content_to_process.append({"type": "scraped", "content": text, "source_index": idx + 1}) 16 | 17 | # Add reference docs if summarization is requested 18 | if args.reference_docs_summarize and reference_docs_content: 19 | print(f"Including {len(reference_docs_content)} reference documents in summarization.") 20 | log_to_file(f"Including {len(reference_docs_content)} reference documents in summarization.") 21 | for doc in reference_docs_content: 22 | content_to_process.append({"type": "reference", "content": doc["content"], "path": doc["path"]}) 23 | elif reference_docs_content: 24 | print(f"Skipping summarization for {len(reference_docs_content)} reference documents as --reference-docs-summarize is not set.") 25 | log_to_file(f"Skipping summarization for {len(reference_docs_content)} reference documents.") 26 | 27 | 28 | total_pieces = len(content_to_process) 29 | if total_pieces == 0: 30 | print("\nWarning: No content (scraped or reference for summarization) available to summarize.") 31 | log_to_file("Summarization Warning: No content found to process.") 32 | return [] # Return empty list if nothing to do 33 | 34 | print(f"\nSummarizing {total_pieces} content piece(s)...") 35 | log_to_file(f"Starting summarization for {total_pieces} piece(s). Topic: {topic}") 36 | summaries_with_scores = [] 37 | successful_summaries = 0 38 | 39 | for i, item in enumerate(content_to_process, 1): 40 | text = item["content"] 41 | item_type = item["type"] 42 | item_source_id = item.get("path", f"Scraped_{item.get('source_index', i)}") # Use path for ref docs, index for scraped 43 | 44 | if len(text) < 100: # Increased minimum length 45 | print(f"\rSkipping summary for short text piece {i}/{total_pieces} ({item_source_id}).", end='', flush=True) 46 | log_to_file(f"Summary {i}/{total_pieces} ({item_source_id}) skipped (too short: {len(text)} chars).") 47 | continue 48 | 49 | # Show progress 50 | print(f"\rSummarizing & Scoring {i}/{total_pieces} ({item_type}) (Completed: {successful_summaries})", end='', flush=True) 51 | 52 | # Limit text size sent to AI if necessary (check API limits) 53 | max_summary_input_chars = 150000 # Example limit, adjust as needed 54 | truncated_text = text[:max_summary_input_chars] 55 | if len(text) > max_summary_input_chars: 56 | log_to_file(f"Warning: Summary {i} ({item_source_id}) input text truncated to {max_summary_input_chars} chars.") 57 | 58 | guidance_text = f"\n**Additional Guidance:** {args.guidance}\n" if args.guidance else "" 59 | prompt = ( 60 | f"Please provide a concise yet comprehensive summary of the following text. Focus on the key information, main arguments, findings, and any specific data points (statistics, percentages, benchmark results, dates, names) relevant to the main topic.\n" 61 | f"**Main Topic:** {topic}{guidance_text}\n" 62 | f"**Text to Summarize:**\n---\n{truncated_text}\n---\n\n" 63 | f"**Instructions:**\n" 64 | f"1. Format your summary *only* within tags.\n" 65 | f"2. After the summary tag, provide a relevance score (integer 0-10) indicating how relevant the *summary* is to the Main Topic ('{topic}') and adheres to any Additional Guidance provided. Enclose the score *only* in tags.\n\n" 66 | f"**Example Response Structure:**\n" 67 | f"This is a concise summary preserving key details like a 95% accuracy rate achieved in 2023 according to Dr. Smith.\n" 68 | f"8" 69 | ) 70 | 71 | raw_response, cleaned_response = call_ai_api(prompt, config, tool_name=f"Summary_{i}_{item_type}", timeout=args.ai_timeout, retries=args.ai_retries) 72 | 73 | summary = "Error: Summarization Failed" 74 | score = -1 # Default score 75 | summary_details = {"type": item_type, "source_id": item_source_id} # Store type and source id 76 | 77 | if cleaned_response: 78 | parsed_summary = parse_ai_tool_response(cleaned_response, "toolScrapeSummary") 79 | # Check if parsing returned the whole response (tag missing) 80 | if parsed_summary == cleaned_response and '' not in cleaned_response: 81 | log_to_file(f"Error: Summary {i} ({item_source_id}) parsing failed - tag missing.") 82 | summary = f"Error: Could not parse summary {i} ({item_source_id}) ( tag missing)" 83 | elif not parsed_summary: 84 | log_to_file(f"Error: Summary {i} ({item_source_id}) parsing failed - No content found in tag.") 85 | summary = f"Error: Could not parse summary {i} ({item_source_id}) (empty tag)" 86 | else: 87 | summary = parsed_summary # Use parsed summary 88 | 89 | # Extract score robustly 90 | score_match = re.search(r'(\d{1,2})', cleaned_response, re.IGNORECASE) 91 | if score_match: 92 | try: 93 | parsed_score = int(score_match.group(1)) 94 | if 0 <= parsed_score <= 10: 95 | score = parsed_score 96 | successful_summaries += 1 # Count success only if score is valid 97 | else: 98 | log_to_file(f"Warning: Summary {i} ({item_source_id}) score '{parsed_score}' out of range (0-10). Using -1.") 99 | except ValueError: 100 | log_to_file(f"Warning: Could not parse summary {i} ({item_source_id}) score '{score_match.group(1)}'. Using -1.") 101 | else: 102 | log_to_file(f"Warning: Could not find/parse tag for summary {i} ({item_source_id}). Using -1.") 103 | 104 | else: # API call itself failed 105 | log_to_file(f"Error: API call failed for Summary_{i} ({item_source_id}). Raw response was empty.") 106 | summary = f"Error: Could not summarize text piece {i} ({item_source_id}) (API call failed or timed out)" 107 | 108 | # Add summary and score along with type and source identifier 109 | summary_details = {"type": item_type, "source_id": item_source_id, 'summary': summary, 'score': score} 110 | summaries_with_scores.append(summary_details) 111 | 112 | # Save the summary text to archive regardless of score validity 113 | if run_archive_dir: 114 | # Create a more descriptive filename 115 | safe_source_id = re.sub(r'[\\/*?:"<>|]', "_", str(item_source_id)) # Sanitize filename chars 116 | summary_filename = os.path.join(run_archive_dir, f"summary_{i}_{item_type}_{safe_source_id[:50]}.txt") # Truncate long paths 117 | try: 118 | with open(summary_filename, 'w', encoding='utf-8') as sf: 119 | sf.write(f"Source: {item_source_id}\nType: {item_type}\nScore: {score}\n\n{summary}") 120 | except IOError as e: 121 | log_to_file(f"Warning: Could not save summary {i} ({item_source_id}) to file {summary_filename}: {e}") 122 | 123 | 124 | # Final status update 125 | print(f"\rSummarization & Scoring complete. Generated {successful_summaries}/{total_pieces} summaries successfully (with valid scores).") 126 | log_to_file(f"Summarization phase complete. Successful summaries (with score): {successful_summaries}/{total_pieces}") 127 | return summaries_with_scores -------------------------------------------------------------------------------- /functions/processing/youtube_descriptor.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | import json # Used for logging raw response 4 | 5 | from ..ai import call_ai_api # Import call_ai_api from the new ai module 6 | from ..utils import log_to_file, clean_thinking_tags # Import utilities 7 | 8 | def generate_youtube_description(report_content, topic, config, args): 9 | """Uses AI to generate a YouTube-friendly description based on an existing report.""" 10 | # Access run_archive_dir from the global scope via utils 11 | from ..utils import run_archive_dir 12 | 13 | print("\nGenerating YouTube description via AI...") 14 | log_to_file(f"Starting YouTube description generation. Topic: {topic}") 15 | 16 | # Check if we have report content to work with 17 | if not report_content: 18 | print("Error: No report content provided for YouTube description generation.") 19 | log_to_file("YouTube Desc Error: No report content provided.") 20 | return None 21 | 22 | guidance_text = f"\n**Additional Guidance:** {args.guidance}\n" if args.guidance else "" 23 | prompt = ( 24 | f"You are an AI assistant tasked with creating a YouTube video description based on an existing comprehensive report about '{topic}'.{guidance_text}\n" 25 | f"**Topic:** {topic}\n" 26 | f"{guidance_text}\n" 27 | f"**Task:**\n" 28 | f"Transform the provided comprehensive report into a YouTube description-friendly format. The output should be:\n" 29 | f"- Concise and scannable with bullet points and clear sections\n" 30 | f"- Simplified technical terms for general audience\n" 31 | f"- Key information organized by categories/sections\n" 32 | f"- Quick reference format suitable for video notes\n" 33 | f"- Actionable insights and specific recommendations\n" 34 | f"- YouTube-compatible plain text formatting (NO MARKDOWN)\n\n" 35 | f"Structure the YouTube description with:\n" 36 | f"1. Brief intro explaining what the topic covers\n" 37 | f"2. Key points organized in bullet format\n" 38 | f"3. Main categories/sections from the report\n" 39 | f"4. Quick takeaways and recommendations\n" 40 | f"5. Simplified explanations of technical concepts\n\n" 41 | f"IMPORTANT FORMATTING RULES FOR YOUTUBE:\n" 42 | f"- Use UPPERCASE for section headers (not ## or ###)\n" 43 | f"- Use • or - for bullet points (not * in markdown)\n" 44 | f"- NO bold (**text**), italics (*text*), or other markdown formatting\n" 45 | f"- Use emojis for visual appeal and section separation\n" 46 | f"- Use line breaks and spacing for readability\n" 47 | f"- Plain text only - YouTube descriptions don't support markdown\n\n" 48 | f"**Source Report Content:**\n" 49 | f"---\n{report_content}\n---\n\n" 50 | f"CRITICAL FORMATTING RULES (OUTPUT MUST FOLLOW EXACTLY):\n" 51 | f"1. OUTPUT TAG: You MUST enclose the entire YouTube description content within a single pair of `` tags.\n" 52 | f"2. CONTENT: Use PLAIN TEXT formatting only - YouTube does NOT support markdown.\n" 53 | f"3. STYLE: Write in a friendly, accessible tone suitable for YouTube audience.\n" 54 | f"4. LENGTH: Keep it comprehensive but scannable - aim for detailed notes that viewers can quickly reference.\n" 55 | f"5. FORMATTING: Use UPPERCASE for headers, • or - for bullets, emojis for visual appeal, and line breaks for spacing.\n" 56 | f"6. NO MARKDOWN: Absolutely no **bold**, *italic*, ##headers##, or other markdown syntax.\n" 57 | f"7. NO EXTRA TEXT: ONLY include the YouTube description text inside the `` tags. ABSOLUTELY NO other text, introductory phrases, explanations, or thinking tags should be present anywhere in the final output.\n\n" 58 | f"Remember: The entire output MUST be ONLY the YouTube description text enclosed in a single `` tag. Use PLAIN TEXT formatting only - no markdown syntax whatsoever since YouTube descriptions don't support it." 59 | ) 60 | 61 | # Save YouTube description prompt 62 | if run_archive_dir: 63 | prompt_filename = os.path.join(run_archive_dir, "youtube_description_prompt.txt") 64 | try: 65 | with open(prompt_filename, 'w', encoding='utf-8') as pf: 66 | pf.write(prompt) 67 | log_to_file(f"Saved YouTube description prompt to {prompt_filename}") 68 | except IOError as e: 69 | log_to_file(f"Warning: Could not save YouTube description prompt: {e}") 70 | 71 | # Call AI 72 | raw_response, cleaned_response = call_ai_api(prompt, config, tool_name="YouTubeDescriptionGeneration", timeout=args.ai_timeout, retries=args.ai_retries) 73 | 74 | # Save raw response 75 | if run_archive_dir and raw_response: 76 | raw_resp_filename = os.path.join(run_archive_dir, "youtube_description_response_raw.txt") 77 | try: 78 | with open(raw_resp_filename, 'w', encoding='utf-8') as rf: 79 | rf.write(raw_response) 80 | log_to_file(f"Saved YouTube description raw response to {raw_resp_filename}") 81 | except IOError as e: 82 | log_to_file(f"Warning: Could not save YouTube description raw response: {e}") 83 | 84 | if not cleaned_response: 85 | print("\nError: Failed to generate YouTube description from AI (empty cleaned response).") 86 | log_to_file("YouTube Desc Error: Failed (empty cleaned response).") 87 | return None 88 | 89 | # Parse the response - Find last tag after cleaning tags 90 | cleaned_text_for_youtube = clean_thinking_tags(cleaned_response) 91 | youtube_description_text = None 92 | if cleaned_text_for_youtube: 93 | last_opening_tag_index = cleaned_text_for_youtube.rfind('') 94 | if last_opening_tag_index != -1: 95 | closing_tag_index = cleaned_text_for_youtube.find('', last_opening_tag_index) 96 | if closing_tag_index != -1: 97 | start_content = last_opening_tag_index + len('') 98 | youtube_description_text = cleaned_text_for_youtube[start_content:closing_tag_index].strip() 99 | 100 | if not youtube_description_text: # Check if parsing failed or resulted in empty string 101 | print("\nError: Could not parse valid content from the AI response.") 102 | log_to_file(f"YouTube Desc Error: Failed to parse tag or content was empty.\nCleaned Response was:\n{cleaned_text_for_youtube}") 103 | # Save the failed YouTube description output for debugging 104 | if run_archive_dir: 105 | failed_youtube_path = os.path.join(run_archive_dir, "youtube_description_FAILED_PARSE.txt") 106 | try: 107 | with open(failed_youtube_path, 'w', encoding='utf-8') as fyf: 108 | fyf.write(cleaned_text_for_youtube or "Original cleaned response was empty.") 109 | except IOError: 110 | pass 111 | return None 112 | 113 | # Save the YouTube description 114 | final_youtube_filename = "youtube_description.md" 115 | final_youtube_filepath = os.path.join(run_archive_dir, final_youtube_filename) if run_archive_dir else final_youtube_filename 116 | 117 | try: 118 | with open(final_youtube_filepath, 'w', encoding='utf-8') as ef: 119 | ef.write(youtube_description_text) 120 | print(f"Saved generated YouTube description to {final_youtube_filepath}") 121 | log_to_file(f"YouTube description saved to {final_youtube_filepath}") 122 | return final_youtube_filepath 123 | except IOError as e: 124 | print(f"\nError: Could not save generated YouTube description to {final_youtube_filepath}: {e}") 125 | log_to_file(f"YouTube Desc Saving Error: Failed to save YouTube description to {final_youtube_filepath}: {e}") 126 | # Try saving to CWD as fallback ONLY if archive failed 127 | if run_archive_dir: 128 | try: 129 | cwd_filename = final_youtube_filename 130 | with open(cwd_filename, 'w', encoding='utf-8') as ef_cwd: 131 | ef_cwd.write(youtube_description_text) 132 | print(f"Saved generated YouTube description to {cwd_filename} (in CWD as fallback)") 133 | log_to_file(f"YouTube description saved to CWD fallback: {cwd_filename}") 134 | return cwd_filename 135 | except IOError as e_cwd: 136 | print(f"\nError: Could not save YouTube description to CWD fallback path either: {e_cwd}") 137 | log_to_file(f"YouTube Desc Saving Error: Failed to save YouTube description to CWD fallback: {e_cwd}") 138 | return None 139 | else: 140 | return None -------------------------------------------------------------------------------- /templates/main_dashboard.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | Ecne AI Podcaster Control Panel 7 | 8 | 9 | 10 |
11 |

Ecne AI Podcaster Control Panel

12 |

Welcome to your centralized hub for generating podcast scripts and audio/video content.

13 | 14 | 32 | 33 | 34 |
35 |

🐳 Orpheus TTS Service Status

36 |
37 | 38 | Checking... 39 |
40 | 45 |
46 |
47 |
48 | 49 | 197 | 198 | 199 | -------------------------------------------------------------------------------- /functions/processing/report_generation.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | import json # Used for logging raw response 4 | 5 | from ..ai import call_ai_api # Import call_ai_api from the new ai module 6 | from ..utils import log_to_file, clean_thinking_tags # Import utilities 7 | 8 | def generate_report(summaries_with_scores, reference_docs_content, topic, config, args): 9 | """Uses AI to generate a written report/paper based on summaries and optionally full reference docs.""" 10 | # Access run_archive_dir from the global scope via utils 11 | from ..utils import run_archive_dir 12 | 13 | print("\nGenerating report via AI...") 14 | log_to_file(f"Starting report generation. Topic: {topic}") 15 | 16 | # --- Process Summaries --- 17 | # Use all valid summaries (which might include summarized ref docs), sorted by score 18 | valid_summaries = [s for s in summaries_with_scores if s['score'] >= 0 and not s['summary'].startswith("Error:")] 19 | num_summaries_used = 0 20 | combined_summaries_text = "No valid summaries were generated or met the criteria." 21 | 22 | if valid_summaries: 23 | top_summaries = sorted(valid_summaries, key=lambda x: x['score'], reverse=True) 24 | num_summaries_used = len(top_summaries) 25 | print(f"Using {num_summaries_used} summaries for report generation.") 26 | log_to_file(f"Report Gen: Using {num_summaries_used} valid summaries.") 27 | combined_summaries_text = "\n\n".join([ 28 | # Include source info in the report prompt context as well 29 | f"Summary {i+1} (Source: {s['source_id']}, Type: {s['type']}, Score: {s['score']}):\n{s['summary']}" 30 | for i, s in enumerate(top_summaries) 31 | ]) 32 | else: 33 | print("Warning: No valid summaries available for report generation.") 34 | log_to_file("Report Gen Warning: No valid summaries found.") 35 | # We might still proceed if full reference docs are available 36 | 37 | # --- Process Full Reference Documents (If Not Summarized) --- 38 | full_reference_docs_text = "" 39 | num_ref_docs_used = 0 40 | if reference_docs_content and not args.reference_docs_summarize: 41 | num_ref_docs_used = len(reference_docs_content) 42 | print(f"Including {num_ref_docs_used} full reference documents directly in the report prompt.") 43 | log_to_file(f"Report Gen: Including {num_ref_docs_used} full reference documents.") 44 | full_reference_docs_text = "\n\n---\n\n".join([ 45 | f"Reference Document (Path: {doc['path']}):\n{doc['content']}" 46 | for doc in reference_docs_content 47 | ]) 48 | # Add a header for clarity in the prompt 49 | full_reference_docs_text = f"**Full Reference Documents (Use for context):**\n---\n{full_reference_docs_text}\n---" 50 | 51 | # Check if we have *any* content to generate from 52 | if num_summaries_used == 0 and num_ref_docs_used == 0: 53 | print("Error: No summaries or reference documents available to generate report.") 54 | log_to_file("Report Gen Error: No summaries or reference documents available for context.") 55 | return None # Cannot generate report without context 56 | 57 | guidance_text = f"\n**Additional Guidance:** {args.guidance}\n" if args.guidance else "" 58 | prompt = ( 59 | f"You are an AI assistant tasked with writing a well-structured, informative research paper/report on the topic: '{topic}'.{guidance_text}\n" 60 | f"**Topic:** {topic}\n" 61 | f"{guidance_text}\n" # Add guidance here as well for clarity 62 | f"**Task:**\n" 63 | f"Generate a comprehensive, well-structured, and informative research paper/report based *thoroughly* on the provided context (summaries and/or full reference documents). Synthesize the information, identify key themes, arguments, evidence, and supporting details (including specific statistics, names, dates, or benchmarks mentioned). Structure the report logically with an introduction (defining the topic and scope), body paragraphs (each exploring a specific facet or theme derived from the context, citing evidence implicitly), and a conclusion (summarizing key findings and potential implications or future directions). Maintain an objective, formal, and informative tone suitable for a research report. **Crucially, this must be a written report/essay format, NOT a script or dialogue.**\n\n" 64 | f"**Context for Report Generation (Analyze ALL):**\n\n" 65 | f"--- Summaries (Analyze these first) ---\n{combined_summaries_text}\n---\n\n" 66 | f"{full_reference_docs_text}\n\n" # This will be empty if no full docs were used 67 | f"**CRITICAL FORMATTING RULES (OUTPUT MUST FOLLOW EXACTLY):**\n" 68 | f"1. **OUTPUT TAG:** You MUST enclose the *entire* report content within a single pair of `` tags.\n" 69 | f"2. **CONTENT:** The content should be well-written, coherent, and directly based on the provided summaries.\n" 70 | f"3. **NO EXTRA TEXT:** ONLY include the report text inside the `` tags. **ABSOLUTELY NO** other text, introductory phrases, explanations, or thinking tags (`...`) should be present anywhere in the final output.\n\n" 71 | f"Remember: The entire output MUST be ONLY the report text enclosed in a single `` tag." 72 | ) 73 | 74 | # Save report prompt 75 | if run_archive_dir: 76 | prompt_filename = os.path.join(run_archive_dir, "report_prompt.txt") 77 | try: 78 | with open(prompt_filename, 'w', encoding='utf-8') as pf: pf.write(prompt) 79 | log_to_file(f"Saved report prompt to {prompt_filename}") 80 | except IOError as e: log_to_file(f"Warning: Could not save report prompt: {e}") 81 | 82 | # Call AI 83 | raw_response, cleaned_response = call_ai_api(prompt, config, tool_name="ReportGeneration", timeout=args.ai_timeout, retries=args.ai_retries) 84 | 85 | # Save raw response 86 | if run_archive_dir and raw_response: 87 | raw_resp_filename = os.path.join(run_archive_dir, "report_response_raw.txt") 88 | try: 89 | with open(raw_resp_filename, 'w', encoding='utf-8') as rf: rf.write(raw_response) 90 | log_to_file(f"Saved report raw response to {raw_resp_filename}") 91 | except IOError as e: log_to_file(f"Warning: Could not save report raw response: {e}") 92 | 93 | if not cleaned_response: 94 | print("\nError: Failed to generate report from AI (empty cleaned response).") 95 | log_to_file("Report Gen Error: Failed (empty cleaned response).") 96 | return None 97 | 98 | # Parse the response - Find last tag after cleaning tags 99 | cleaned_text_for_report = clean_thinking_tags(cleaned_response) 100 | report_text = None 101 | if cleaned_text_for_report: 102 | last_opening_tag_index = cleaned_text_for_report.rfind('') 103 | if last_opening_tag_index != -1: 104 | closing_tag_index = cleaned_text_for_report.find('', last_opening_tag_index) 105 | if closing_tag_index != -1: 106 | start_content = last_opening_tag_index + len('') 107 | report_text = cleaned_text_for_report[start_content:closing_tag_index].strip() 108 | 109 | if not report_text: # Check if parsing failed or resulted in empty string 110 | print("\nError: Could not parse valid content from the AI response.") 111 | log_to_file(f"Report Gen Error: Failed to parse tag or content was empty.\nCleaned Response was:\n{cleaned_text_for_report}") 112 | # Save the failed report output for debugging 113 | if run_archive_dir: 114 | failed_report_path = os.path.join(run_archive_dir, "report_FAILED_PARSE.txt") 115 | try: 116 | with open(failed_report_path, 'w', encoding='utf-8') as frf: frf.write(cleaned_text_for_report or "Original cleaned response was empty.") 117 | except IOError: pass 118 | return None 119 | 120 | # Save the report 121 | final_report_filename = "podcast_report.txt" 122 | final_report_filepath = os.path.join(run_archive_dir, final_report_filename) if run_archive_dir else final_report_filename 123 | 124 | try: 125 | with open(final_report_filepath, 'w', encoding='utf-8') as ef: 126 | ef.write(report_text) 127 | print(f"Saved generated report to {final_report_filepath}") 128 | log_to_file(f"Report saved to {final_report_filepath}") 129 | return final_report_filepath 130 | except IOError as e: 131 | print(f"\nError: Could not save generated report to {final_report_filepath}: {e}") 132 | log_to_file(f"Report Saving Error: Failed to save report to {final_report_filepath}: {e}") 133 | # Try saving to CWD as fallback ONLY if archive failed 134 | if run_archive_dir: 135 | try: 136 | cwd_filename = final_report_filename 137 | with open(cwd_filename, 'w', encoding='utf-8') as ef_cwd: ef_cwd.write(report_text) 138 | print(f"Saved generated report to {cwd_filename} (in CWD as fallback)") 139 | log_to_file(f"Report saved to CWD fallback: {cwd_filename}") 140 | return cwd_filename 141 | except IOError as e_cwd: 142 | print(f"\nError: Could not save report to CWD fallback path either: {e_cwd}") 143 | log_to_file(f"Report Saving Error: Failed to save report to CWD fallback: {e_cwd}") 144 | return None 145 | else: 146 | return None 147 | -------------------------------------------------------------------------------- /templates/podcast_builder_form.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | Podcast Audio/Video Generator 7 | 8 | 9 | 10 |
11 |

Podcast Audio/Video Generator

12 | 13 | 16 | 17 |
18 | 19 |

Script Input

20 |
21 | 22 | 25 |
26 | 30 | 31 |

Voice Settings

32 |
33 | 34 | 35 |
36 |
37 | 38 | 39 |
40 |
41 | 42 | 43 |
44 |
45 | 46 | 47 |
48 |
49 | 50 | 51 |
52 | 53 | 54 |
55 |

Quality Presets

56 |
57 | 58 | 64 |
65 | 66 |

API Settings

67 |
68 | 69 | 70 |
71 |
72 | 73 | 74 |
75 |
76 | 77 | 78 |
79 |
80 | 81 | 82 |
83 | 84 |

Output & Video Settings

85 |
86 | 87 | 88 |
89 |
90 | 91 | 96 |
97 |
98 | 99 | 106 |
107 |
108 | 109 | 110 |
111 |
112 | 113 | 114 |
115 |
116 | 117 | 122 |
123 |
124 | 125 | 130 |
131 |
132 | 133 | 138 |
139 |
140 | 141 | 142 |
143 |
144 | 145 | 146 |
147 |
148 | 149 | 150 | 151 |
152 | 153 |
154 |

Or Resume Existing Podcast

155 |
156 | 159 | 160 |
161 |
162 | 163 | 168 |
169 | 170 | 171 | 180 | 181 | 182 | 183 | 184 | -------------------------------------------------------------------------------- /functions/tts/processing.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess # Added for subprocess.run 3 | import shlex 4 | import tempfile 5 | import shutil 6 | import numpy as np 7 | import soundfile as sf 8 | 9 | try: 10 | from pydub import AudioSegment 11 | pydub_available = True 12 | except ImportError: 13 | print("Warning: 'pydub' library not found. Audio manipulation (gain, trim, padding) disabled.") 14 | pydub_available = False 15 | 16 | def apply_audio_enhancements(audio_path, config, temp_dir): 17 | """ 18 | Applies FFmpeg enhancements (noise reduction, compression, normalization, de-essing) 19 | and pydub processing (gain, trim, padding) to an audio file. 20 | 21 | Args: 22 | audio_path (str): Path to the input audio file. 23 | config (dict): Dictionary containing processing parameters (e.g., nr_level, gain_factor, etc.). 24 | temp_dir (str): Path to a temporary directory for intermediate files. 25 | 26 | Returns: 27 | tuple: (path_to_processed_file, samplerate) or (None, None) on failure. 28 | """ 29 | processed_audio_path = audio_path 30 | samplerate = None 31 | 32 | if not os.path.exists(audio_path): 33 | print(f"Error: Input audio file not found for processing: {audio_path}") 34 | return None, None 35 | 36 | # Get samplerate from the initial audio file 37 | try: 38 | info = sf.info(audio_path) 39 | samplerate = info.samplerate 40 | except Exception as e: 41 | print(f"Error getting samplerate from {audio_path}: {e}") 42 | return None, None 43 | 44 | # --- FFmpeg Enhancement (Conditional) --- 45 | final_apply_ffmpeg = config.get('apply_ffmpeg_enhancement', True) 46 | final_apply_deesser = config.get('apply_deesser', True) # Default ON 47 | final_deesser_freq = config.get('deesser_freq', 3000) 48 | final_nr_level = config.get('nr_level', 0) 49 | final_compress_thresh = config.get('compress_thresh', 1.0) 50 | final_compress_ratio = config.get('compress_ratio', 1) 51 | final_norm_frame_len = config.get('norm_frame_len', 10) 52 | final_norm_gauss_size = config.get('norm_gauss_size', 3) 53 | 54 | ffmpeg_temp_path = None 55 | 56 | if final_apply_ffmpeg: 57 | # Ensure final norm_gauss_size is odd 58 | if final_norm_gauss_size % 2 == 0: 59 | final_norm_gauss_size -= 1 60 | print(f" Adjusting Norm Gauss size from {final_norm_gauss_size + 1} to {final_norm_gauss_size} (must be odd).") 61 | 62 | ffmpeg_temp_fd, ffmpeg_temp_path = tempfile.mkstemp(suffix="_ffmpeg.wav", prefix="segment_", dir=temp_dir) 63 | os.close(ffmpeg_temp_fd) 64 | 65 | try: 66 | filter_chain = [] 67 | 68 | if final_apply_deesser: 69 | filter_chain.append(f"firequalizer=gain='if(gte(f,{final_deesser_freq}),-5,0)'") 70 | 71 | if final_nr_level > 0: 72 | filter_chain.append(f"afftdn=nr={final_nr_level}") 73 | 74 | comp_thresh_str = f"{final_compress_thresh:.3f}" 75 | filter_chain.append(f"acompressor=threshold={comp_thresh_str}:ratio={final_compress_ratio}:attack=10:release=100") 76 | 77 | filter_chain.append(f"dynaudnorm=f={final_norm_frame_len}:g={final_norm_gauss_size}") 78 | 79 | audio_filter = ','.join(filter_chain) 80 | 81 | ffmpeg_command = [ 82 | 'ffmpeg', 83 | '-i', audio_path, 84 | '-af', audio_filter, 85 | '-y', 86 | ffmpeg_temp_path 87 | ] 88 | print(f" Attempting FFmpeg enhancement: {' '.join(shlex.quote(arg) for arg in ffmpeg_command)}") 89 | result = subprocess.run(ffmpeg_command, capture_output=True, text=True, check=False) 90 | 91 | if result.returncode == 0 and os.path.exists(ffmpeg_temp_path) and os.path.getsize(ffmpeg_temp_path) > 44: 92 | processed_audio_path = ffmpeg_temp_path 93 | print(f" -> SUCCESS: FFmpeg enhancement saved to: {os.path.basename(ffmpeg_temp_path)}") 94 | else: 95 | print(f" !! Warning: FFmpeg processing failed or produced empty file. Using original audio.") 96 | print(f" Return Code: {result.returncode}") 97 | print(f" Stderr: {result.stderr.strip()}") 98 | if os.path.exists(ffmpeg_temp_path): 99 | try: os.remove(ffmpeg_temp_path) 100 | except OSError: pass 101 | ffmpeg_temp_path = None 102 | except FileNotFoundError: 103 | print(f" !! Error: 'ffmpeg' command not found. Skipping enhancement.") 104 | if ffmpeg_temp_path and os.path.exists(ffmpeg_temp_path): 105 | try: os.remove(ffmpeg_temp_path) 106 | except OSError: pass 107 | ffmpeg_temp_path = None 108 | except Exception as ffmpeg_e: 109 | print(f" !! Warning: Error running FFmpeg processing: {ffmpeg_e}. Skipping enhancement.") 110 | if ffmpeg_temp_path and os.path.exists(ffmpeg_temp_path): 111 | try: os.remove(ffmpeg_temp_path) 112 | except OSError: pass 113 | ffmpeg_temp_path = None 114 | else: 115 | print(" -> Skipping FFmpeg enhancement as requested.") 116 | 117 | # --- Pydub Processing (Gain, Trim, Pad) --- 118 | final_gain_factor = config.get('gain_factor', 1.0) 119 | final_trim_end_ms = config.get('trim_end_ms', 0) 120 | pad_end_ms = config.get('pad_end_ms', 0) 121 | 122 | if pydub_available: 123 | try: 124 | print(f" Processing with pydub (Gain, Trim, Pad) on: {os.path.basename(processed_audio_path)}...") 125 | segment = AudioSegment.from_wav(processed_audio_path) 126 | samplerate = segment.frame_rate 127 | 128 | if final_gain_factor != 1.0 and final_gain_factor > 0: 129 | print(f" -> Applying gain: {final_gain_factor:.2f}x") 130 | gain_db = 20 * np.log10(final_gain_factor) 131 | segment = segment + gain_db 132 | 133 | if final_trim_end_ms > 0 and len(segment) > final_trim_end_ms: 134 | print(f" -> Trimming {final_trim_end_ms}ms from end.") 135 | segment = segment[:-final_trim_end_ms] 136 | elif final_trim_end_ms > 0: 137 | print(f" -> Warning: Segment length ({len(segment)}ms) is less than trim duration ({final_trim_end_ms}ms). Skipping trim.") 138 | 139 | if pad_end_ms > 0: 140 | print(f" -> Padding {pad_end_ms}ms silence to end.") 141 | padding = AudioSegment.silent(duration=pad_end_ms, frame_rate=samplerate) 142 | segment = segment + padding 143 | else: 144 | print(f" -> No end padding requested (pad_end_ms={pad_end_ms}).") 145 | 146 | # Create a new temp file for the final pydub output 147 | pydub_temp_fd, pydub_temp_path = tempfile.mkstemp(suffix="_pydub.wav", prefix="segment_", dir=temp_dir) 148 | os.close(pydub_temp_fd) 149 | 150 | print(f" -> Exporting final processed audio to {os.path.basename(pydub_temp_path)}") 151 | segment.export(pydub_temp_path, format="wav") 152 | duration = len(segment) / 1000.0 153 | print(f" -> Final segment saved ({duration:.2f}s, SR: {samplerate} Hz)") 154 | 155 | # Cleanup the intermediate FFmpeg file if it was created 156 | if ffmpeg_temp_path and os.path.exists(ffmpeg_temp_path): 157 | try: os.remove(ffmpeg_temp_path) 158 | except OSError as e: print(f" Warning: Could not remove ffmpeg temp file {ffmpeg_temp_path}: {e}") 159 | 160 | return pydub_temp_path, samplerate 161 | 162 | except Exception as pydub_e: 163 | print(f"!! Error during pydub processing: {pydub_e}") 164 | print(f"!! Falling back to using the pre-pydub audio: {os.path.basename(processed_audio_path)}") 165 | # If pydub fails, try to copy the ffmpeg/initial file to the final path 166 | try: 167 | final_fd, final_path_on_error = tempfile.mkstemp(suffix="_final_fallback.wav", prefix="segment_", dir=temp_dir) 168 | os.close(final_fd) 169 | shutil.copy2(processed_audio_path, final_path_on_error) 170 | # Need to get samplerate if we didn't get it from pydub 171 | if samplerate is None: 172 | with sf.SoundFile(final_path_on_error) as audio_info: 173 | samplerate = audio_info.samplerate 174 | 175 | # Cleanup the intermediate FFmpeg file if it was created 176 | if ffmpeg_temp_path and os.path.exists(ffmpeg_temp_path): 177 | try: os.remove(ffmpeg_temp_path) 178 | except OSError as e: print(f" Warning: Could not remove ffmpeg temp file {ffmpeg_temp_path}: {e}") 179 | 180 | return final_path_on_error, samplerate 181 | except Exception as copy_e: 182 | print(f"!! Error copying fallback audio: {copy_e}") 183 | return None, None 184 | else: 185 | print("!! Pydub not available. Skipping gain, trim, and padding.") 186 | # If pydub is not available, the processed_audio_path (from FFmpeg or initial) is the final one. 187 | # We should copy it to a new temp file to ensure it's not the original input file. 188 | try: 189 | final_fd, final_path_no_pydub = tempfile.mkstemp(suffix="_final_nopydub.wav", prefix="segment_", dir=temp_dir) 190 | os.close(final_fd) 191 | shutil.copy2(processed_audio_path, final_path_no_pydub) 192 | 193 | # Cleanup the intermediate FFmpeg file if it was created 194 | if ffmpeg_temp_path and os.path.exists(ffmpeg_temp_path): 195 | try: os.remove(ffmpeg_temp_path) 196 | except OSError as e: print(f" Warning: Could not remove ffmpeg temp file {ffmpeg_temp_path}: {e}") 197 | 198 | return final_path_no_pydub, samplerate 199 | except Exception as copy_e: 200 | print(f"!! Error copying non-pydub audio: {copy_e}") 201 | return None, None -------------------------------------------------------------------------------- /functions/scraping/documents.py: -------------------------------------------------------------------------------- 1 | import os 2 | import PyPDF2 # For PDF processing 3 | import docx # For DOCX processing 4 | 5 | from ..utils import log_to_file # Import log_to_file from the utils module 6 | 7 | def load_reference_documents(args): 8 | """ 9 | Loads content from specified reference documents (txt, pdf, docx) or from a folder. 10 | Returns a list of dictionaries with 'path' and 'content'. 11 | """ 12 | reference_docs_content = [] 13 | processed_paths = set() # To avoid processing the same file twice if specified by both args 14 | 15 | # --- Load Reference Documents from comma-separated paths --- 16 | if args.reference_docs: 17 | print("\nLoading reference documents from paths...") 18 | log_to_file(f"Attempting to load reference documents from paths: {args.reference_docs}") 19 | ref_doc_paths = [p.strip() for p in args.reference_docs.split(',') if p.strip()] 20 | for doc_path in ref_doc_paths: 21 | full_doc_path = os.path.abspath(doc_path) # Get absolute path for consistent tracking 22 | if full_doc_path in processed_paths: 23 | print(f" - Skipping already processed document: {doc_path}") 24 | log_to_file(f"Skipping already processed document: {doc_path}") 25 | continue 26 | 27 | content = None 28 | try: 29 | print(f" - Processing reference document: {doc_path}") 30 | if doc_path.lower().endswith('.pdf'): 31 | # PDF processing 32 | text_content = [] 33 | with open(doc_path, 'rb') as pdf_file: # Open in binary mode 34 | reader = PyPDF2.PdfReader(pdf_file) # Use PdfReader 35 | if reader.is_encrypted: 36 | print(f" - Warning: Skipping encrypted PDF: {doc_path}") 37 | log_to_file(f"Warning: Skipping encrypted PDF: {doc_path}") 38 | continue # Skip encrypted PDFs 39 | for page in reader.pages: 40 | page_text = page.extract_text() 41 | if page_text: # Ensure text was extracted 42 | text_content.append(page_text) 43 | content = "\n".join(text_content) 44 | print(f" - Extracted text from PDF.") 45 | elif doc_path.lower().endswith('.docx'): 46 | # DOCX processing 47 | doc = docx.Document(doc_path) 48 | text_content = [para.text for para in doc.paragraphs if para.text] # Filter empty paragraphs 49 | content = "\n".join(text_content) 50 | print(f" - Extracted text from DOCX.") 51 | else: # Assume plain text for .txt or unknown/other extensions 52 | if not doc_path.lower().endswith('.txt'): 53 | print(f" - Warning: Unknown extension for '{doc_path}', attempting to read as plain text.") 54 | log_to_file(f"Warning: Unknown extension for reference doc '{doc_path}', reading as text.") 55 | with open(doc_path, 'r', encoding='utf-8') as f: 56 | content = f.read() 57 | print(f" - Read as plain text.") 58 | 59 | # Process extracted content 60 | if content and content.strip(): 61 | reference_docs_content.append({"path": doc_path, "content": content.strip()}) 62 | processed_paths.add(full_doc_path) 63 | print(f" - Successfully loaded content ({len(content)} chars).") 64 | log_to_file(f"Loaded reference doc: {doc_path} ({len(content)} chars)") 65 | else: 66 | print(f" - Warning: No text content extracted or file is empty: {doc_path}") 67 | log_to_file(f"Warning: Reference document {doc_path} empty or no text extracted.") 68 | 69 | except FileNotFoundError: 70 | print(f" - Error: Reference document file not found: {doc_path}") 71 | log_to_file(f"Error: Reference document file not found: {doc_path}") 72 | except PyPDF2.errors.PdfReadError as pdf_err: # Catch specific PyPDF2 errors 73 | print(f" - Error reading PDF file {doc_path}: {pdf_err}") 74 | log_to_file(f"Error reading PDF file {doc_path}: {pdf_err}") 75 | except Exception as e: # General catch-all 76 | print(f" - Error processing reference document {doc_path}: {e}") 77 | log_to_file(f"Error processing reference document {doc_path}: {e} (Type: {type(e).__name__})") 78 | 79 | if not reference_docs_content and args.reference_docs: 80 | print("Warning: No valid reference documents were loaded from paths despite --reference-docs being set.") 81 | log_to_file("Warning: --reference-docs set, but no content loaded from paths.") 82 | 83 | 84 | # --- Load Reference Documents from Folder --- 85 | if args.reference_docs_folder: 86 | print(f"\nLoading reference documents from folder: {args.reference_docs_folder}") 87 | log_to_file(f"Attempting to load reference documents from folder: {args.reference_docs_folder}") 88 | if not os.path.isdir(args.reference_docs_folder): 89 | print(f" - Error: Provided path is not a valid directory: {args.reference_docs_folder}") 90 | log_to_file(f"Error: --reference-docs-folder path is not a directory: {args.reference_docs_folder}") 91 | else: 92 | for filename in os.listdir(args.reference_docs_folder): 93 | doc_path = os.path.join(args.reference_docs_folder, filename) 94 | full_doc_path = os.path.abspath(doc_path) # Get absolute path for consistent tracking 95 | 96 | if not os.path.isfile(doc_path): 97 | continue # Skip subdirectories 98 | 99 | if full_doc_path in processed_paths: 100 | print(f" - Skipping already processed document: {doc_path}") 101 | log_to_file(f"Skipping already processed document: {doc_path}") 102 | continue 103 | 104 | content = None 105 | file_ext = os.path.splitext(filename)[1].lower() 106 | 107 | try: 108 | print(f" - Processing reference document: {doc_path}") 109 | if file_ext == '.pdf': 110 | # PDF processing 111 | text_content = [] 112 | with open(doc_path, 'rb') as pdf_file: 113 | reader = PyPDF2.PdfReader(pdf_file) 114 | if reader.is_encrypted: 115 | print(f" - Warning: Skipping encrypted PDF: {doc_path}") 116 | log_to_file(f"Warning: Skipping encrypted PDF: {doc_path}") 117 | continue 118 | for page in reader.pages: 119 | page_text = page.extract_text() 120 | if page_text: 121 | text_content.append(page_text) 122 | content = "\n".join(text_content) 123 | print(f" - Extracted text from PDF.") 124 | elif file_ext == '.docx': 125 | # DOCX processing 126 | doc = docx.Document(doc_path) 127 | text_content = [para.text for para in doc.paragraphs if para.text] 128 | content = "\n".join(text_content) 129 | print(f" - Extracted text from DOCX.") 130 | elif file_ext == '.txt': 131 | # TXT processing 132 | with open(doc_path, 'r', encoding='utf-8') as f: 133 | content = f.read() 134 | print(f" - Read as plain text.") 135 | else: 136 | print(f" - Skipping unsupported file type: {filename}") 137 | log_to_file(f"Skipping unsupported file type in reference folder: {filename}") 138 | continue # Skip unsupported files 139 | 140 | # Process extracted content 141 | if content and content.strip(): 142 | reference_docs_content.append({"path": doc_path, "content": content.strip()}) 143 | processed_paths.add(full_doc_path) 144 | print(f" - Successfully loaded content ({len(content)} chars).") 145 | log_to_file(f"Loaded reference doc from folder: {doc_path} ({len(content)} chars)") 146 | else: 147 | print(f" - Warning: No text content extracted or file is empty: {doc_path}") 148 | log_to_file(f"Warning: Reference document {doc_path} from folder is empty or no text extracted.") 149 | 150 | except FileNotFoundError: # Should not happen with listdir unless race condition 151 | print(f" - Error: Reference document file not found unexpectedly: {doc_path}") 152 | log_to_file(f"Error: Reference document file not found unexpectedly: {doc_path}") 153 | except PyPDF2.errors.PdfReadError as pdf_err: 154 | print(f" - Error reading PDF file {doc_path}: {pdf_err}") 155 | log_to_file(f"Error reading PDF file {doc_path} from folder: {pdf_err}") 156 | except Exception as e: 157 | print(f" - Error processing reference document {doc_path}: {e}") 158 | log_to_file(f"Error processing reference document {doc_path} from folder: {e} (Type: {type(e).__name__})") 159 | 160 | log_to_file(f"Finished processing reference documents folder. Total loaded: {len(reference_docs_content)}") 161 | 162 | if not reference_docs_content and (args.reference_docs or args.reference_docs_folder): 163 | print("Warning: No valid reference documents were loaded from specified paths or folder.") 164 | log_to_file("Warning: Reference docs/folder specified, but no content loaded.") 165 | 166 | 167 | return reference_docs_content -------------------------------------------------------------------------------- /functions/scraping/reddit.py: -------------------------------------------------------------------------------- 1 | import time 2 | import random 3 | import urllib.parse 4 | import os # Import os module 5 | import time 6 | import random 7 | import urllib.parse 8 | from selenium import webdriver 9 | from selenium.webdriver.common.by import By 10 | from selenium.webdriver.chrome.service import Service as ChromeService 11 | from selenium.webdriver.support.ui import WebDriverWait 12 | from selenium.webdriver.support import expected_conditions as EC 13 | from selenium.common.exceptions import TimeoutException, NoSuchElementException 14 | 15 | from ..utils import log_to_file, USER_AGENTS # Import utilities 16 | 17 | def scrape_reddit_source(subreddit_name, search_queries, args, seen_urls_global, source_scrape_limit): 18 | """ 19 | Scrapes content from a specific subreddit using Selenium. 20 | Returns a list of scraped text content from posts/comments. 21 | Updates the seen_urls_global set. 22 | """ 23 | print(f" - Processing Reddit source '{subreddit_name}' using Selenium/old.reddit.com...") 24 | log_to_file(f"Initiating Selenium scrape for r/{subreddit_name}") 25 | driver = None 26 | all_post_links_for_subreddit = set() 27 | reddit_texts = [] # Store texts scraped from this source 28 | source_texts_count = 0 # Track count for this source 29 | 30 | # Determine the path to chromedriver within the virtual environment 31 | # This script is in Ecne-AI-Podcasterv2/functions/scraping/ 32 | # The venv is in Ecne-AI-Podcasterv2/host_venv/ 33 | # Chromedriver will be in Ecne-AI-Podcasterv2/host_venv/bin/ 34 | script_dir = os.path.dirname(os.path.abspath(__file__)) 35 | # Go up two directories (from functions/scraping to Ecne-AI-Podcasterv2) 36 | project_root = os.path.abspath(os.path.join(script_dir, '..', '..')) 37 | chromedriver_path = os.path.join(project_root, 'host_venv', 'bin', 'chromedriver') 38 | 39 | # Check if chromedriver exists at the expected path 40 | if not os.path.exists(chromedriver_path): 41 | print(f" - ERROR: Chromedriver not found at expected path: {chromedriver_path}") 42 | log_to_file(f"Selenium Skip: Chromedriver not found at {chromedriver_path}") 43 | return [] # Return empty list if chromedriver is not found 44 | 45 | try: 46 | options = webdriver.ChromeOptions() 47 | options.add_argument('--headless'); options.add_argument('--no-sandbox'); options.add_argument('--disable-dev-shm-usage') 48 | options.add_argument(f'user-agent={random.choice(USER_AGENTS)}') 49 | 50 | # Use ChromeService to specify the executable path 51 | service = ChromeService(executable_path=chromedriver_path) 52 | driver = webdriver.Chrome(service=service, options=options) 53 | 54 | wait = WebDriverWait(driver, 20) # Consider making timeout configurable 55 | print(" - Selenium WebDriver initialized using venv chromedriver.") 56 | 57 | # --- Perform Search for Each Keyword Query --- 58 | for query_idx, search_query in enumerate(search_queries): 59 | print(f" - Searching subreddit '{subreddit_name}' for query {query_idx+1}/{len(search_queries)}: '{search_query}'") 60 | try: 61 | encoded_query = urllib.parse.quote_plus(search_query) 62 | # Using old.reddit.com for potentially simpler structure 63 | search_url = f"https://old.reddit.com/r/{subreddit_name}/search?q={encoded_query}&restrict_sr=on&sort=relevance&t=all" 64 | print(f" - Navigating to search URL: {search_url}") 65 | driver.get(search_url) 66 | time.sleep(random.uniform(2, 4)) # Allow page to load 67 | 68 | print(" - Waiting for search results...") 69 | wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "div.search-result-link, div.search-result"))) # General result container 70 | link_elements = driver.find_elements(By.CSS_SELECTOR, "a.search-title") # Titles usually link to posts 71 | print(f" - Found {len(link_elements)} potential result links for this query.") 72 | 73 | count = 0 74 | for link_element in link_elements: 75 | href = link_element.get_attribute('href') 76 | # Ensure it's a comments link and not already seen 77 | if href and '/comments/' in href and href not in all_post_links_for_subreddit: 78 | all_post_links_for_subreddit.add(href) 79 | count += 1 80 | print(f" - Added {count} new unique post links.") 81 | 82 | except TimeoutException: 83 | print(f" - Timeout waiting for search results for query: '{search_query}'") 84 | log_to_file(f"Selenium Timeout waiting for search results: r/{subreddit_name}, Query: '{search_query}'") 85 | except Exception as search_e: 86 | print(f" - Error extracting search results for query '{search_query}': {search_e}") 87 | log_to_file(f"Selenium Error extracting search results: r/{subreddit_name}, Query: '{search_query}': {search_e}") 88 | 89 | time.sleep(random.uniform(1, 2)) # Delay between searches 90 | 91 | # --- Scrape Collected Post Links --- 92 | unique_post_links = list(all_post_links_for_subreddit) 93 | print(f" - Total unique post links found across all queries for '{subreddit_name}': {len(unique_post_links)}") 94 | links_to_scrape = unique_post_links[:source_scrape_limit] # Apply limit on *posts* to scrape 95 | print(f" - Scraping top {len(links_to_scrape)} posts based on --max-reddit-results={source_scrape_limit}") 96 | 97 | if not links_to_scrape: 98 | print(" - No post links found to scrape for this subreddit.") 99 | 100 | for post_url in links_to_scrape: 101 | if post_url in seen_urls_global: 102 | print(f" - Skipping already scraped URL (globally): {post_url}") 103 | continue 104 | # Check limit *for this source* again (safe redundancy) 105 | if source_texts_count >= source_scrape_limit: 106 | print(f" - Reached post scrape limit ({source_scrape_limit}) for subreddit {subreddit_name}.") 107 | break # Stop scraping more posts for this subreddit 108 | 109 | print(f" - Navigating to post: {post_url}") 110 | try: 111 | driver.get(post_url) 112 | time.sleep(random.uniform(2, 4)) # Allow comments to load 113 | 114 | post_title = "N/A"; post_body = ""; comment_texts = [] 115 | # Extract Title (using old.reddit selector) 116 | try: 117 | title_element = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "p.title a.title"))) 118 | post_title = title_element.text.strip() 119 | except (TimeoutException, NoSuchElementException): print(" - Warning: Could not find post title.") 120 | 121 | # Extract Body (using old.reddit selector) 122 | try: 123 | body_elements = driver.find_elements(By.CSS_SELECTOR, "div.expando div.md") 124 | if body_elements: post_body = body_elements[0].text.strip() 125 | except NoSuchElementException: pass 126 | except Exception as body_e: print(f" - Warning: Error extracting post body: {body_e}") 127 | 128 | # Extract Comments (using old.reddit selector) 129 | try: 130 | comment_elements = driver.find_elements(By.CSS_SELECTOR, "div.commentarea .comment .md p") 131 | print(f" - Found {len(comment_elements)} comment paragraphs. Scraping top {args.max_reddit_comments}.") 132 | for comment_element in comment_elements[:args.max_reddit_comments]: # Use args limit here 133 | comment_text = comment_element.text.strip() 134 | if comment_text: # Avoid empty paragraphs 135 | comment_texts.append(comment_text) 136 | except NoSuchElementException: pass 137 | except Exception as comment_e: print(f" - Warning: Error extracting comments: {comment_e}") 138 | 139 | # Combine content 140 | # Extract permalink from post_url for logging/reference 141 | permalink = post_url # Use post_url as permalink for old reddit 142 | full_content = f"Source: Reddit (r/{subreddit_name})\nPermalink: {permalink}\nTitle: {post_title}\n\nBody:\n{post_body}\n\nComments:\n" + "\n---\n".join(comment_texts) 143 | content_length = len(full_content) 144 | min_length = 150 # Minimum chars to be considered valid content 145 | 146 | if content_length > min_length: 147 | reddit_texts.append(full_content.strip()) # Add to this source's list 148 | seen_urls_global.add(post_url) # Mark as scraped globally 149 | source_texts_count += 1 # Increment count for this source 150 | print(f" - Success: Scraped content from post ({content_length} chars).") 151 | log_to_file(f"Selenium scrape success: {post_url} ({content_length} chars)") 152 | else: 153 | print(f" - Warning: Scraped content ({content_length} chars) seems too short (min {min_length}). Skipping post.") 154 | log_to_file(f"Selenium scrape warning (too short): {post_url} ({content_length} chars)") 155 | 156 | except TimeoutException: 157 | print(f" - Timeout loading post page: {post_url}") 158 | log_to_file(f"Selenium Timeout loading post page: {post_url}") 159 | except Exception as post_e: 160 | print(f" - Error processing post page {post_url}: {post_e}") 161 | log_to_file(f"Selenium Error processing post page {post_url}: {post_e}") 162 | finally: 163 | time.sleep(random.uniform(1.5, 3)) # Delay between posts 164 | 165 | except Exception as selenium_e: 166 | print(f" - An error occurred during Selenium processing for r/{subreddit_name}: {selenium_e}") 167 | log_to_file(f"Selenium Error processing source r/{subreddit_name}: {selenium_e}") 168 | finally: 169 | if driver: 170 | print(" - Quitting Selenium WebDriver.") 171 | driver.quit() 172 | 173 | print(f" - Finished processing Reddit source r/{subreddit_name}. Scraped {source_texts_count} piece(s).") 174 | return reddit_texts -------------------------------------------------------------------------------- /templates/script_builder_form.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | AI Podcast Generator 7 | 8 | 9 | 10 |
11 |

AI Podcast Generator

12 | 13 | 16 | 17 | 18 | 19 |
20 |
21 | 22 | 23 |
24 | 25 |
26 | 27 | 28 |
29 | 30 |
31 | 32 | 33 |
34 | 35 |
36 | 37 | 41 |
42 | 43 |
44 | 45 | 49 |
50 |
51 | 52 | 53 |
54 | 55 |
56 | Additional Options 57 | 58 |
59 | 60 | 61 |
62 | 63 |
64 | 65 | 66 |
67 | 68 |
69 | 70 | 71 |
72 | 73 |
74 | 75 | 76 |
77 | 78 |
79 | 80 | 81 |
82 | 83 |
84 | 85 | 86 |
87 | 88 |
89 | 90 | 91 |
92 | 93 |
94 | 95 | 96 |
97 | 98 |
99 | 100 | 101 |
102 | 103 |
104 | 105 | 106 |
107 | 108 |
109 | 110 | 111 |
112 | 113 |
114 | 115 | 116 |
117 | 118 |
119 | 120 | 121 |
122 | 123 |
124 | 125 | 126 |
127 | 128 |
129 | 130 | 131 |
132 | 133 |
134 | 135 | 136 |
137 | 138 |
139 | 140 | 141 |
142 | 143 |
144 | 145 | 146 |
147 |
148 | 149 | 150 |
151 | 152 |
Drag and drop files here
153 |
    154 | 155 |
    156 | 157 |
    158 | 159 |
    Drag and drop a .txt file here
    160 |

    161 | 162 |
    163 | 164 | 165 | 166 | 181 |
    182 |
    183 | 184 | 185 | 186 | 187 | 207 | 208 | 209 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /functions/tts/gui/player.py: -------------------------------------------------------------------------------- 1 | import tkinter as tk 2 | from tkinter import ttk, messagebox 3 | import threading 4 | import time 5 | import os 6 | import soundfile as sf 7 | import pygame 8 | 9 | # Constants (copied from orpheus_tts.py, consider centralizing if used elsewhere) 10 | # SCRIPT_DIR = os.path.dirname(__file__) # This would be functions/tts/gui 11 | # IMAGE_DIR = os.path.abspath(os.path.join(SCRIPT_DIR, "..", "..", "..", "settings/images")) 12 | # MUSIC_DIR = os.path.abspath(os.path.join(SCRIPT_DIR, "..", "..", "..", "settings/music")) 13 | # VOICE_DIR = os.path.abspath(os.path.join(SCRIPT_DIR, "..", "..", "..", "settings/voices")) 14 | 15 | # Initialize pygame mixer (should ideally be done once at application start) 16 | try: 17 | pygame.mixer.init() 18 | if not pygame.mixer.get_init(): 19 | print("Warning: Pygame mixer failed to initialize.") 20 | pygame = None # Treat as if pygame is not available 21 | except ImportError: 22 | print("Warning: 'pygame' library not found. pip install pygame") 23 | pygame = None 24 | 25 | class AudioPlayer(ttk.Frame): 26 | def __init__(self, parent, redo_command=None, waveform_ax=None, waveform_canvas_agg=None): 27 | super().__init__(parent) 28 | self.redo_command = redo_command 29 | self.waveform_ax = waveform_ax 30 | self.waveform_canvas_agg = waveform_canvas_agg 31 | self.progress_line = None 32 | 33 | self.current_file = None 34 | self.is_playing = False 35 | self.current_pos = 0 36 | 37 | self.controls_frame = ttk.Frame(self) 38 | self.controls_frame.pack(side=tk.TOP, fill=tk.X, padx=5, pady=(5, 2)) 39 | 40 | self.play_btn = ttk.Button(self.controls_frame, text="Play", width=5, command=self.toggle_play, state=tk.DISABLED) 41 | self.play_btn.pack(side=tk.LEFT, padx=2) 42 | 43 | self.stop_btn = ttk.Button(self.controls_frame, text="Stop", width=5, command=self.stop, state=tk.DISABLED) 44 | self.stop_btn.pack(side=tk.LEFT, padx=2) 45 | 46 | self.redo_btn = ttk.Button(self.controls_frame, text="Redo", width=5, command=self.redo_command, state=tk.DISABLED) 47 | self.redo_btn.pack(side=tk.LEFT, padx=2) 48 | 49 | self.progress_frame = ttk.Frame(self) 50 | self.progress_frame.pack(side=tk.TOP, fill=tk.X, padx=5, pady=(0, 5)) 51 | 52 | self.time_var = tk.StringVar(value="00:00 / 00:00") 53 | self.time_label = ttk.Label(self.progress_frame, textvariable=self.time_var) 54 | self.time_label.pack(side=tk.RIGHT, padx=5) 55 | 56 | self.update_thread = None 57 | 58 | def load_file(self, filepath): 59 | self.stop() 60 | self.current_file = None 61 | self.time_var.set("00:00 / 00:00") 62 | self.play_btn.configure(state=tk.DISABLED) 63 | self.stop_btn.configure(state=tk.DISABLED) 64 | self._update_progress_line(0) 65 | 66 | if not pygame: 67 | print("AudioPlayer: Pygame not available.") 68 | return False 69 | 70 | if not filepath: 71 | print("AudioPlayer: No file path provided.") 72 | return False 73 | 74 | print(f"AudioPlayer: Attempting to load audio file: {filepath}") 75 | if not os.path.exists(filepath): 76 | print(f"AudioPlayer: Error - File does not exist: {filepath}") 77 | return False 78 | 79 | try: 80 | info = sf.info(filepath) 81 | duration = info.frames / info.samplerate 82 | print(f"AudioPlayer: Duration calculated via soundfile: {duration:.2f}s") 83 | 84 | pygame.mixer.music.load(filepath) 85 | print(f"AudioPlayer: Pygame loaded: {filepath}") 86 | 87 | self.current_file = filepath 88 | self.duration = duration 89 | self.update_time_label(0, duration) 90 | self.play_btn.configure(state=tk.NORMAL) 91 | self.stop_btn.configure(state=tk.NORMAL) 92 | self._update_progress_line(0) 93 | return True 94 | 95 | except Exception as e: 96 | print(f"AudioPlayer: Error loading audio file {filepath}: {e}") 97 | self.current_file = None 98 | return False 99 | 100 | def toggle_play(self): 101 | if not self.current_file or not pygame or not pygame.mixer.get_init(): 102 | return 103 | 104 | if self.is_playing: 105 | try: 106 | pygame.mixer.music.pause() 107 | self.play_btn.configure(text="Play") 108 | self.is_playing = False 109 | except Exception as e: 110 | print(f"AudioPlayer: Error pausing music: {e}") 111 | else: 112 | try: 113 | if not pygame.mixer.music.get_busy(): 114 | print("AudioPlayer: Music not busy, reloading and playing from start/seek pos.") 115 | pygame.mixer.music.load(self.current_file) 116 | pygame.mixer.music.play(start=self.current_pos) 117 | else: 118 | pygame.mixer.music.unpause() 119 | 120 | self.play_btn.configure(text="Pause") 121 | self.is_playing = True 122 | 123 | if not self.update_thread or not self.update_thread.is_alive(): 124 | self.update_thread = threading.Thread(target=self.update_progress, daemon=True) 125 | self.update_thread.start() 126 | except Exception as e: 127 | messagebox.showerror("Playback Error", f"Error playing audio: {str(e)}") 128 | print(f"AudioPlayer: Error playing/unpausing music: {e}") 129 | self.is_playing = False 130 | self.play_btn.configure(text="Play") 131 | return 132 | 133 | def stop(self): 134 | if pygame and pygame.mixer.get_init(): 135 | try: 136 | pygame.mixer.music.stop() 137 | pygame.mixer.music.unload() 138 | except Exception as e: 139 | print(f"AudioPlayer: Error stopping/unloading music: {e}") 140 | 141 | self.is_playing = False 142 | self.current_pos = 0 143 | self.play_btn.configure(text="Play") 144 | if self.current_file: 145 | self.play_btn.configure(state=tk.NORMAL) 146 | else: 147 | self.play_btn.configure(state=tk.DISABLED) 148 | self.update_time_label(0, getattr(self, 'duration', 0)) 149 | self._update_progress_line(0) 150 | if self.current_file: 151 | self.stop_btn.configure(state=tk.NORMAL) 152 | else: 153 | self.stop_btn.configure(state=tk.DISABLED) 154 | 155 | def seek_to_time(self, target_time): 156 | """Seeks playback to the specified time (in seconds).""" 157 | if not self.current_file or not pygame or not pygame.mixer.get_init(): 158 | return 159 | if target_time < 0 or target_time > self.duration: 160 | print(f"AudioPlayer: Invalid seek time: {target_time:.2f}s") 161 | return 162 | 163 | print(f"AudioPlayer: Seek requested to {target_time:.2f}s") 164 | was_playing = self.is_playing 165 | self.is_playing = False 166 | 167 | try: 168 | pygame.mixer.music.stop() 169 | pygame.mixer.music.load(self.current_file) 170 | pygame.mixer.music.play(start=target_time) 171 | self.current_pos = target_time 172 | self.is_playing = True 173 | self.play_btn.configure(text="Pause") 174 | self._update_progress_line(target_time) 175 | 176 | if not self.update_thread or not self.update_thread.is_alive(): 177 | print("AudioPlayer: Restarting update thread after seek.") 178 | self.update_thread = threading.Thread(target=self.update_progress, daemon=True) 179 | self.update_thread.start() 180 | 181 | except Exception as e: 182 | print(f"AudioPlayer: Error seeking and playing: {e}") 183 | messagebox.showerror("Seek Error", f"Error seeking audio: {e}") 184 | self.stop() 185 | 186 | def update_progress(self): 187 | print("AudioPlayer: Starting update_progress loop.") 188 | while pygame and pygame.mixer.get_init() and self.current_file: 189 | if not self.is_playing: 190 | print("AudioPlayer: is_playing is False, breaking loop.") 191 | break 192 | try: 193 | current_playback_time = pygame.mixer.music.get_pos() / 1000.0 194 | display_pos = self.current_pos + current_playback_time 195 | 196 | if current_playback_time < 0: 197 | if not pygame.mixer.music.get_busy(): 198 | print("AudioPlayer: Playback finished (get_busy is False).") 199 | self.is_playing = False 200 | self.play_btn.configure(text="Play") 201 | self.current_pos = 0 202 | print("AudioPlayer: Exiting loop after playback finished.") 203 | break 204 | else: 205 | time.sleep(0.1) 206 | continue 207 | 208 | if display_pos >= self.duration: 209 | display_pos = self.duration 210 | if not pygame.mixer.music.get_busy(): 211 | self.is_playing = False 212 | self.play_btn.configure(text="Play") 213 | self.current_pos = 0 214 | self._update_progress_line(0) 215 | self.update_time_label(0, self.duration) 216 | print("AudioPlayer: Exiting loop after playback finished naturally.") 217 | break 218 | 219 | self._update_progress_line(display_pos) 220 | self.update_time_label(display_pos, self.duration) 221 | 222 | except Exception as e: 223 | if isinstance(e, pygame.error) and "mixer not initialized" in str(e): 224 | print("AudioPlayer: Mixer became uninitialized during update.") 225 | self.is_playing = False 226 | break 227 | print(f"AudioPlayer: Error in update_progress: {type(e).__name__} - {e}") 228 | self.is_playing = False 229 | break 230 | 231 | time.sleep(0.1) 232 | print("AudioPlayer: Exited update_progress loop.") 233 | 234 | def update_time_label(self, current, total): 235 | try: 236 | total = max(0, total) 237 | current = max(0, min(current, total)) 238 | current_str = time.strftime("%M:%S", time.gmtime(current)) 239 | total_str = time.strftime("%M:%S", time.gmtime(total)) 240 | self.time_var.set(f"{current_str} / {total_str}") 241 | except ValueError as e: 242 | print(f"AudioPlayer: ValueError updating time label: {e}. Current: {current}, Total: {total}") 243 | self.time_var.set("--:-- / --:--") 244 | 245 | def _update_progress_line(self, time_pos): 246 | """Updates the vertical progress line on the waveform plot.""" 247 | if self.waveform_ax and self.waveform_canvas_agg: 248 | try: 249 | xlim = self.waveform_ax.get_xlim() 250 | time_pos = max(xlim[0], min(time_pos, xlim[1])) 251 | except Exception: 252 | pass 253 | 254 | if self.progress_line is None: 255 | if self.waveform_ax.lines: 256 | self.progress_line = self.waveform_ax.axvline(time_pos, color='r', linestyle='--', linewidth=1, label='_nolegend_') 257 | else: 258 | self.progress_line = None 259 | return 260 | elif self.progress_line in self.waveform_ax.lines: 261 | self.progress_line.set_xdata([time_pos, time_pos]) 262 | else: 263 | self.progress_line = self.waveform_ax.axvline(time_pos, color='r', linestyle='--', linewidth=1, label='_nolegend_') 264 | 265 | try: 266 | self.waveform_canvas_agg.draw_idle() 267 | except Exception as e: 268 | print(f"AudioPlayer: Error drawing progress line: {e}") 269 | 270 | def cleanup(self): 271 | """Stop playback and cleanup resources""" 272 | print("AudioPlayer: Cleanup called.") 273 | self.is_playing = False 274 | self.stop() -------------------------------------------------------------------------------- /static/main.css: -------------------------------------------------------------------------------- 1 | body { 2 | font-family: sans-serif; 3 | line-height: 1.6; 4 | margin: 0; 5 | padding: 20px; 6 | background-color: #f4f4f4; 7 | color: #333; 8 | } 9 | 10 | .container { 11 | max-width: 960px; 12 | margin: 0 auto; 13 | background: #fff; 14 | padding: 25px 40px; 15 | border-radius: 10px; 16 | box-shadow: 0 4px 12px rgba(0, 0, 0, 0.1); 17 | } 18 | 19 | h1, h2 { 20 | color: #0056b3; 21 | border-bottom: 2px solid #eee; 22 | padding-bottom: 10px; 23 | margin-bottom: 20px; 24 | } 25 | 26 | .form-group { 27 | margin-bottom: 20px; 28 | } 29 | 30 | .form-group label { 31 | display: block; 32 | margin-bottom: 5px; 33 | font-weight: bold; 34 | } 35 | 36 | .form-group input[type="text"], 37 | .form-group input[type="number"], 38 | .form-group input[type="date"], 39 | .form-group select, 40 | .form-group textarea { 41 | width: 100%; 42 | padding: 10px; /* Increased padding */ 43 | border: 1px solid #ccc; 44 | border-radius: 5px; /* Slightly more rounded corners */ 45 | box-sizing: border-box; 46 | transition: border-color 0.3s, box-shadow 0.3s; /* Added transition */ 47 | } 48 | 49 | .form-group input[type="text"]:focus, 50 | .form-group input[type="number"]:focus, 51 | .form-group input[type="date"]:focus, 52 | .form-group select:focus, 53 | .form-group textarea:focus { 54 | border-color: #007bff; 55 | box-shadow: 0 0 5px rgba(0, 123, 255, 0.5); 56 | outline: none; 57 | } 58 | 59 | .checkbox-group { 60 | display: flex; 61 | align-items: center; 62 | margin-bottom: 10px; 63 | } 64 | 65 | .checkbox-group input[type="checkbox"] { 66 | margin-right: 10px; 67 | } 68 | 69 | /* Styles for the main dashboard menu grid */ 70 | .menu-grid { 71 | display: grid; 72 | grid-template-columns: repeat(auto-fit, minmax(250px, 1fr)); 73 | gap: 20px; 74 | margin-top: 30px; 75 | } 76 | 77 | .menu-item { 78 | background-color: #e9f5ff; 79 | border: 1px solid #b3e0ff; 80 | border-radius: 8px; 81 | padding: 20px; 82 | text-align: center; 83 | text-decoration: none; 84 | color: #0056b3; 85 | transition: background-color 0.3s ease, transform 0.3s ease; 86 | box-shadow: 0 2px 5px rgba(0, 0, 0, 0.05); 87 | } 88 | 89 | .menu-item:hover { 90 | background-color: #cce7ff; 91 | transform: translateY(-5px); 92 | } 93 | 94 | .menu-item h2 { 95 | margin-top: 0; 96 | margin-bottom: 10px; 97 | color: #0056b3; 98 | border-bottom: none; 99 | padding-bottom: 0; 100 | } 101 | 102 | .menu-item p { 103 | font-size: 0.9em; 104 | color: #333; 105 | } 106 | 107 | /* Style for the collapsible button (from podcast_gui.css) */ 108 | .collapsible { 109 | background-color: #f1f1f1; 110 | color: #444; 111 | cursor: pointer; 112 | padding: 18px; 113 | width: 100%; 114 | border: none; 115 | text-align: left; 116 | outline: none; 117 | font-size: 15px; 118 | transition: 0.4s; 119 | margin-top: 20px; 120 | border-radius: 4px; 121 | } 122 | 123 | .active, .collapsible:hover { 124 | background-color: #ccc; 125 | } 126 | 127 | /* Style for the collapsible content (from podcast_gui.css) */ 128 | .content { 129 | padding: 0 18px; 130 | background-color: white; 131 | max-height: 0; 132 | overflow: hidden; 133 | transition: max-height 0.2s ease-out; 134 | border: 1px solid #eee; 135 | border-top: none; 136 | border-radius: 0 0 4px 4px; 137 | margin-bottom: 20px; 138 | } 139 | 140 | .content h3 { 141 | color: #0056b3; 142 | border-bottom: 1px solid #eee; 143 | padding-bottom: 5px; 144 | margin-top: 20px; 145 | margin-bottom: 15px; 146 | } 147 | 148 | /* Drop area styles (from style.css) */ 149 | .drop-area { 150 | border: 2px dashed #ccc; 151 | border-radius: 4px; 152 | padding: 20px; 153 | text-align: center; 154 | cursor: pointer; 155 | margin-top: 10px; 156 | background-color: #f9f9f9; 157 | } 158 | 159 | .drop-area.highlight { 160 | border-color: #007bff; 161 | background-color: #e9e9e9; 162 | } 163 | 164 | #reference-docs-list { 165 | list-style: none; 166 | padding: 0; 167 | margin-top: 10px; 168 | } 169 | 170 | #reference-docs-list li { 171 | background-color: #e9e9e9; 172 | padding: 8px; 173 | margin-bottom: 5px; 174 | border-radius: 4px; 175 | display: flex; 176 | justify-content: space-between; 177 | align-items: center; 178 | } 179 | 180 | #reference-docs-list li .remove-file { 181 | color: #dc3545; 182 | cursor: pointer; 183 | font-weight: bold; 184 | } 185 | 186 | button, .button { 187 | background-color: #007bff; 188 | color: white; 189 | padding: 12px 25px; 190 | border: none; 191 | border-radius: 5px; 192 | cursor: pointer; 193 | font-size: 16px; 194 | margin-top: 10px; 195 | transition: background-color 0.3s, transform 0.2s; 196 | text-transform: uppercase; 197 | letter-spacing: 1px; 198 | font-weight: bold; 199 | } 200 | 201 | button:hover, .button:hover { 202 | background-color: #0056b3; 203 | transform: translateY(-2px); 204 | } 205 | 206 | details { 207 | border: 1px solid #eee; 208 | border-radius: 5px; 209 | margin-bottom: 20px; 210 | background-color: #f9f9f9; 211 | } 212 | 213 | summary { 214 | padding: 15px; 215 | font-weight: bold; 216 | cursor: pointer; 217 | outline: none; 218 | color: #0056b3; 219 | } 220 | 221 | details[open] { 222 | background-color: #fff; 223 | } 224 | 225 | details[open] summary { 226 | border-bottom: 1px solid #eee; 227 | } 228 | 229 | details .form-group { 230 | padding: 0 15px; 231 | } 232 | 233 | details .form-group:first-of-type { 234 | padding-top: 15px; 235 | } 236 | 237 | details .form-group:last-of-type { 238 | padding-bottom: 15px; 239 | margin-bottom: 0; 240 | } 241 | 242 | /* Output console styles */ 243 | #output pre, 244 | #script-console-output, 245 | #podcast-console-output { 246 | background-color: #333; 247 | color: #f4f4f4; 248 | padding: 15px; 249 | border-radius: 4px; 250 | overflow-x: auto; 251 | white-space: pre-wrap; 252 | word-wrap: break-word; 253 | max-height: 300px; /* Fixed height */ 254 | overflow-y: auto; /* Scrollable */ 255 | text-align: left; 256 | margin-top: 20px; 257 | } 258 | 259 | #results { 260 | margin-top: 20px; 261 | padding: 15px; 262 | background-color: #d4edda; 263 | color: #155724; 264 | border: 1px solid #c3e6cb; 265 | border-radius: 4px; 266 | } 267 | 268 | #output-links a, #report-links a { /* Combined from both CSS files */ 269 | display: block; /* Changed from inline-block for better history display */ 270 | margin-top: 10px; 271 | color: #004085; 272 | text-decoration: none; 273 | font-weight: bold; 274 | } 275 | 276 | #output-links a:hover, #report-links a:hover { 277 | text-decoration: underline; 278 | } 279 | 280 | nav a { 281 | margin-right: 15px; 282 | text-decoration: none; 283 | color: #007bff; 284 | } 285 | 286 | nav a:hover { 287 | text-decoration: underline; 288 | } 289 | 290 | .settings-section { 291 | margin-bottom: 30px; 292 | padding-bottom: 20px; 293 | border-bottom: 1px dashed #ccc; 294 | } 295 | 296 | .settings-section:last-child { 297 | border-bottom: none; 298 | } 299 | 300 | #llm-model-details { 301 | border: 1px solid #eee; 302 | padding: 15px; 303 | margin-top: 15px; 304 | border-radius: 4px; 305 | background-color: #f9f9f9; 306 | } 307 | 308 | #llm-model-details .form-group input[readonly] { 309 | background-color: #e9e9e9; 310 | } 311 | 312 | #delete-llm-model { 313 | background-color: #dc3545; 314 | } 315 | 316 | #delete-llm-model:hover { 317 | background-color: #c82333; 318 | } 319 | 320 | /* Modal Overlay (from podcast_gui.css) */ 321 | .modal-overlay { 322 | position: fixed; 323 | top: 0; 324 | left: 0; 325 | width: 100%; 326 | height: 100%; 327 | background-color: rgba(0, 0, 0, 0.7); 328 | display: flex; 329 | justify-content: center; 330 | align-items: center; 331 | z-index: 1000; 332 | } 333 | 334 | .modal-content { 335 | background-color: #fff; 336 | padding: 30px; 337 | border-radius: 8px; 338 | box-shadow: 0 0 20px rgba(0, 0, 0, 0.3); 339 | text-align: center; 340 | max-width: 500px; 341 | width: 90%; 342 | } 343 | 344 | .modal-content h2 { 345 | color: #0056b3; 346 | margin-top: 0; 347 | } 348 | 349 | .modal-content p { 350 | font-size: 1.1em; 351 | margin-bottom: 20px; 352 | } 353 | 354 | /* Spinner animation (combined and adjusted for size) */ 355 | .spinner { 356 | border: 4px solid rgba(0, 123, 255, 0.3); 357 | border-radius: 50%; 358 | border-top: 4px solid #007bff; 359 | width: 40px; /* Larger spinner from podcast_gui.css */ 360 | height: 40px; /* Larger spinner from podcast_gui.css */ 361 | animation: spin 1s linear infinite; 362 | margin: 20px auto; /* Center the spinner from podcast_gui.css */ 363 | display: block; /* Ensure it takes its own line */ 364 | } 365 | 366 | @keyframes spin { 367 | 0% { transform: rotate(0deg); } 368 | 100% { transform: rotate(360deg); } 369 | } 370 | 371 | /* Styles for history page output list */ 372 | .output-list { 373 | margin-top: 20px; 374 | } 375 | 376 | .output-item { 377 | background-color: #f9f9f9; 378 | border: 1px solid #eee; 379 | border-radius: 8px; 380 | padding: 15px; 381 | margin-bottom: 15px; 382 | box-shadow: 0 1px 3px rgba(0, 0, 0, 0.05); 383 | } 384 | 385 | .output-item h3 { 386 | margin-top: 0; 387 | margin-bottom: 10px; 388 | color: #0056b3; 389 | border-bottom: 1px solid #eee; 390 | padding-bottom: 5px; 391 | } 392 | 393 | .output-item p { 394 | margin-bottom: 5px; 395 | font-size: 0.95em; 396 | } 397 | 398 | .output-item .button { 399 | display: inline-block; 400 | margin-right: 10px; 401 | margin-top: 10px; 402 | padding: 8px 15px; 403 | background-color: #007bff; 404 | color: white; 405 | text-decoration: none; 406 | border-radius: 4px; 407 | font-size: 0.9em; 408 | } 409 | 410 | .output-item .button:hover { 411 | background-color: #0056b3; 412 | } 413 | 414 | .back-button { 415 | display: inline-block; 416 | margin-bottom: 20px; 417 | padding: 10px 15px; 418 | background-color: #6c757d; 419 | color: white; 420 | text-decoration: none; 421 | border-radius: 4px; 422 | font-size: 0.9em; 423 | } 424 | 425 | .back-button:hover { 426 | background-color: #5a6268; 427 | } 428 | 429 | /* Docker Status Widget Styles */ 430 | .docker-status-widget { 431 | background-color: #f8f9fa; 432 | border: 1px solid #dee2e6; 433 | border-radius: 8px; 434 | padding: 20px; 435 | margin-bottom: 30px; 436 | box-shadow: 0 2px 5px rgba(0, 0, 0, 0.05); 437 | } 438 | 439 | .docker-status-widget h3 { 440 | margin-top: 0; 441 | margin-bottom: 15px; 442 | color: #495057; 443 | border-bottom: 1px solid #dee2e6; 444 | padding-bottom: 10px; 445 | } 446 | 447 | .status-indicator { 448 | display: flex; 449 | align-items: center; 450 | margin-bottom: 15px; 451 | } 452 | 453 | .status-dot { 454 | width: 12px; 455 | height: 12px; 456 | border-radius: 50%; 457 | margin-right: 10px; 458 | display: inline-block; 459 | } 460 | 461 | .status-dot.status-running { 462 | background-color: #28a745; 463 | box-shadow: 0 0 5px rgba(40, 167, 69, 0.5); 464 | } 465 | 466 | .status-dot.status-stopped { 467 | background-color: #ffc107; 468 | } 469 | 470 | .status-dot.status-building { 471 | background-color: #17a2b8; 472 | animation: pulse 2s infinite; 473 | } 474 | 475 | .status-dot.status-error { 476 | background-color: #dc3545; 477 | } 478 | 479 | @keyframes pulse { 480 | 0% { opacity: 1; } 481 | 50% { opacity: 0.5; } 482 | 100% { opacity: 1; } 483 | } 484 | 485 | .status-text { 486 | font-weight: bold; 487 | color: #495057; 488 | } 489 | 490 | .docker-controls { 491 | margin-bottom: 15px; 492 | } 493 | 494 | .docker-btn { 495 | display: inline-block; 496 | padding: 8px 16px; 497 | margin-right: 10px; 498 | border: none; 499 | border-radius: 4px; 500 | text-decoration: none; 501 | font-size: 14px; 502 | cursor: pointer; 503 | transition: background-color 0.3s ease; 504 | } 505 | 506 | .docker-btn.start-btn { 507 | background-color: #28a745; 508 | color: white; 509 | } 510 | 511 | .docker-btn.start-btn:hover { 512 | background-color: #218838; 513 | } 514 | 515 | .docker-btn.stop-btn { 516 | background-color: #dc3545; 517 | color: white; 518 | } 519 | 520 | .docker-btn.stop-btn:hover { 521 | background-color: #c82333; 522 | } 523 | 524 | .docker-btn.ui-btn { 525 | background-color: #007bff; 526 | color: white; 527 | } 528 | 529 | .docker-btn.ui-btn:hover { 530 | background-color: #0056b3; 531 | } 532 | 533 | .docker-btn:disabled { 534 | opacity: 0.6; 535 | cursor: not-allowed; 536 | } 537 | 538 | .docker-message { 539 | font-size: 14px; 540 | color: #6c757d; 541 | font-style: italic; 542 | } 543 | 544 | /* Easy Mode Button */ 545 | .easy-mode-button { 546 | background-color: #28a745; /* Green color */ 547 | color: white; 548 | padding: 12px 20px; 549 | border: none; 550 | border-radius: 5px; 551 | cursor: pointer; 552 | font-size: 18px; 553 | width: 100%; 554 | box-sizing: border-box; 555 | margin-bottom: 20px; /* Space below the button */ 556 | display: block; /* Ensure it takes full width */ 557 | } 558 | 559 | .easy-mode-button:hover { 560 | background-color: #218838; /* Darker green on hover */ 561 | } 562 | 563 | /* Modal Styles */ 564 | .modal { 565 | display: none; /* Hidden by default */ 566 | position: fixed; /* Stay in place */ 567 | z-index: 1000; /* Sit on top */ 568 | left: 0; 569 | top: 0; 570 | width: 100%; /* Full width */ 571 | height: 100%; /* Full height */ 572 | overflow: auto; /* Enable scroll if needed */ 573 | background-color: rgba(0,0,0,0.4); /* Black w/ opacity */ 574 | padding-top: 60px; 575 | } 576 | 577 | .modal-content { 578 | background-color: #fefefe; 579 | margin: 5% auto; /* 15% from the top and centered */ 580 | padding: 20px; 581 | border: 1px solid #888; 582 | border-radius: 8px; 583 | width: 80%; /* Could be more or less, depending on screen size */ 584 | max-width: 600px; 585 | position: relative; 586 | box-shadow: 0 4px 8px rgba(0,0,0,0.2); 587 | animation-name: animatetop; 588 | animation-duration: 0.4s 589 | } 590 | 591 | /* Add Animation */ 592 | @keyframes animatetop { 593 | from {top: -300px; opacity: 0} 594 | to {top: 0; opacity: 1} 595 | } 596 | 597 | .close-button { 598 | color: #aaa; 599 | float: right; 600 | font-size: 28px; 601 | font-weight: bold; 602 | } 603 | 604 | .close-button:hover, 605 | .close-button:focus { 606 | color: black; 607 | text-decoration: none; 608 | cursor: pointer; 609 | } 610 | 611 | #research-description { 612 | width: 100%; 613 | padding: 12px; 614 | border: 1px solid #ccc; 615 | border-radius: 5px; 616 | box-sizing: border-box; 617 | font-size: 16px; 618 | line-height: 1.5; 619 | margin-bottom: 15px; 620 | resize: vertical; 621 | box-shadow: inset 0 1px 3px rgba(0,0,0,0.1); 622 | transition: border-color 0.3s, box-shadow 0.3s; 623 | } 624 | 625 | #research-description:focus { 626 | border-color: #007bff; 627 | box-shadow: 0 0 5px rgba(0, 123, 255, 0.5); 628 | outline: none; 629 | } 630 | 631 | /* Resume Section Styles */ 632 | .resume-section { 633 | margin-top: 30px; 634 | padding-top: 20px; 635 | border-top: 2px solid #eee; 636 | } 637 | 638 | .resume-section h2 { 639 | font-size: 1.2em; 640 | color: #333; 641 | border-bottom: none; 642 | padding-bottom: 0; 643 | margin-bottom: 15px; 644 | } 645 | 646 | #resume-container { 647 | display: flex; 648 | align-items: center; 649 | gap: 10px; 650 | } 651 | 652 | #resume-container select { 653 | flex-grow: 1; /* Allows the select to take up available space */ 654 | } 655 | 656 | #load-podcast-button { 657 | background-color: #28a745; /* Green color for load button */ 658 | flex-shrink: 0; /* Prevents the button from shrinking */ 659 | } 660 | 661 | #load-podcast-button:hover { 662 | background-color: #218838; 663 | } 664 | --------------------------------------------------------------------------------