├── run.bat ├── requirements.txt ├── api_config.json ├── abrir_cmd.bat ├── i18n ├── i18n.py ├── locale_diff.py ├── scan_i18n.py └── locale │ ├── en_US.json │ └── pt_BR.json ├── prompt.txt ├── scripts ├── save_json.py ├── transcribe_cuts.py ├── cut_json.py ├── download_video.py ├── organize_output.py ├── burn_subtitles.py ├── one_face.py ├── face_detection_insightface.py ├── two_face.py ├── transcribe_video.py ├── cut_segments.py ├── adjust_subtitles.py ├── create_viral_segments.py └── edit_video.py ├── changelog.md ├── README_en.md ├── README.md ├── main_improved.py ├── ViralCutter.ipynb └── LICENSE /run.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | setlocal 3 | title ViralCutter 4 | 5 | env\python.exe main_improved.py 6 | echo. 7 | pause -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | g4f[all] 2 | yt-dlp 3 | ffmpeg-python 4 | whisperx 5 | mediapipe 6 | google-genai 7 | insightface 8 | onnxruntime-gpu -------------------------------------------------------------------------------- /api_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "selected_api": "gemini", 3 | "gemini": { 4 | "api_key": "", 5 | "model": "gemini-2.5-flash-lite-preview-09-2025", 6 | "chunk_size": 20000 7 | }, 8 | "g4f": { 9 | "model": "gpt-4o-mini", 10 | "chunk_size": 2000 11 | } 12 | } -------------------------------------------------------------------------------- /abrir_cmd.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | 3 | REM Vai para a pasta onde está o .bat 4 | cd /d "%~dp0" 5 | 6 | REM Inicializa o conda 7 | call "%USERPROFILE%\miniconda3\Scripts\activate.bat" 8 | 9 | REM Ativa o ambiente local 10 | call conda activate ./env 11 | 12 | REM Abre um CMD interativo e mantém aberto 13 | cmd /k 14 | -------------------------------------------------------------------------------- /i18n/i18n.py: -------------------------------------------------------------------------------- 1 | import json 2 | import locale 3 | import os 4 | 5 | 6 | def load_language_list(language): 7 | with open(f"./i18n/locale/{language}.json", "r", encoding="utf-8") as f: 8 | language_list = json.load(f) 9 | return language_list 10 | 11 | 12 | class I18nAuto: 13 | def __init__(self, language=None): 14 | if language in ["Auto", None]: 15 | language = locale.getdefaultlocale()[ 16 | 0 17 | ] # getlocale can't identify the system's language ((None, None)) 18 | if not os.path.exists(f"./i18n/locale/{language}.json"): 19 | language = "en_US" 20 | self.language = language 21 | self.language_map = load_language_list(language) 22 | 23 | def __call__(self, key): 24 | return self.language_map.get(key, key) 25 | 26 | def __repr__(self): 27 | return "Use Language: " + self.language 28 | -------------------------------------------------------------------------------- /i18n/locale_diff.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | from collections import OrderedDict 4 | 5 | # Define the standard file name 6 | standard_file = "locale/zh_CN.json" 7 | 8 | # Find all JSON files in the directory 9 | dir_path = "locale/" 10 | languages = [ 11 | os.path.join(dir_path, f) 12 | for f in os.listdir(dir_path) 13 | if f.endswith(".json") and f != standard_file 14 | ] 15 | 16 | # Load the standard file 17 | with open(standard_file, "r", encoding="utf-8") as f: 18 | standard_data = json.load(f, object_pairs_hook=OrderedDict) 19 | 20 | # Loop through each language file 21 | for lang_file in languages: 22 | # Load the language file 23 | with open(lang_file, "r", encoding="utf-8") as f: 24 | lang_data = json.load(f, object_pairs_hook=OrderedDict) 25 | 26 | # Find the difference between the language file and the standard file 27 | diff = set(standard_data.keys()) - set(lang_data.keys()) 28 | 29 | miss = set(lang_data.keys()) - set(standard_data.keys()) 30 | 31 | # Add any missing keys to the language file 32 | for key in diff: 33 | lang_data[key] = key 34 | 35 | # Del any extra keys to the language file 36 | for key in miss: 37 | del lang_data[key] 38 | 39 | # Sort the keys of the language file to match the order of the standard file 40 | lang_data = OrderedDict( 41 | sorted(lang_data.items(), key=lambda x: list(standard_data.keys()).index(x[0])) 42 | ) 43 | 44 | # Save the updated language file 45 | with open(lang_file, "w", encoding="utf-8") as f: 46 | json.dump(lang_data, f, ensure_ascii=False, indent=4, sort_keys=True) 47 | f.write("\n") 48 | -------------------------------------------------------------------------------- /prompt.txt: -------------------------------------------------------------------------------- 1 | You are a Viral Segment Identifier, an AI system that analyzes a video's transcript and predicts which segments might go viral on social media platforms. 2 | 3 | You use factors such as emotional impact, humor, unexpected content, relevance to current trends, AND narrative completeness to make your predictions. 4 | 5 | A segment is considered VALID ONLY IF it contains: 6 | 1. A clear narrative arc: 7 | - Beginning: a strong hook that creates curiosity, tension, or emotional engagement within the first 2–3 seconds. 8 | - Middle: a coherent and self-contained development of the idea. 9 | - End: a clear resolution, punchline, conclusion, or emotionally satisfying closure. 10 | 2. Contextual completeness: 11 | - The segment must be fully understandable on its own, without requiring previous or subsequent parts of the video. 12 | 3. Non-generic content: 13 | - Avoid generic motivational phrases, vague advice, or overused expressions unless they are clearly reframed, subverted, or delivered in an unexpected way. 14 | 4. Viral triggers: 15 | - At least one must be present: 16 | - Emotional contrast or escalation 17 | - Humor or irony 18 | - Surprise, contradiction, or pattern break 19 | - A strong opinion or specific point of view 20 | - High relatability tied to a concrete situation 21 | 22 | {context_instruction} 23 | 24 | Given the following video transcript chunk, {virality_instruction}. 25 | 26 | CONSTRAINTS: 27 | - Each segment duration: {min_duration}s to {max_duration}s. 28 | - Cuts MUST MAKE SENSE contextually and respect natural speech boundaries (no mid-sentence cuts). 29 | - Segments MUST have a clear beginning, middle, and end. 30 | - Do NOT force the requested number of segments if the content does not justify it. 31 | - Reject any segment that feels incomplete, generic, or dependent on external context. 32 | - RETURN ONLY VALID JSON. No extra text. 33 | 34 | TRANSCRIPT CHUNK: 35 | {transcript_chunk} 36 | 37 | OUTPUT FORMAT: 38 | {json_template} 39 | 40 | -------------------------------------------------------------------------------- /scripts/save_json.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | 4 | def save_viral_segments(segments_data=None, project_folder="tmp"): 5 | output_txt_file = os.path.join(project_folder, "viral_segments.txt") 6 | 7 | # Verifica se o arquivo já existe 8 | if not os.path.exists(output_txt_file): 9 | if segments_data is None: 10 | # Solicita ao usuário que insira o JSON caso o arquivo não exista e os segmentos não estejam definidos 11 | while True: 12 | user_input = input("\nPor favor, insira o JSON no formato desejado:\n") 13 | try: 14 | # Tenta carregar o JSON inserido 15 | segments_data = json.loads(user_input) 16 | 17 | # Valida se o formato está correto 18 | if "segments" in segments_data and isinstance(segments_data["segments"], list): 19 | # Salva os dados em um arquivo JSON 20 | with open(output_txt_file, 'w', encoding='utf-8') as file: 21 | json.dump(segments_data, file, ensure_ascii=False, indent=4) 22 | print(f"Segmentos virais salvos em {output_txt_file}") 23 | break 24 | else: 25 | print("Formato inválido. Certifique-se de que a estrutura está correta.") 26 | except json.JSONDecodeError: 27 | print("Erro ao decifrar o JSON. Por favor, verifique a formatação.") 28 | print("Por favor, tente novamente.") 29 | else: 30 | # Caso os segmentos tenham sido gerados, salva automaticamente 31 | with open(output_txt_file, 'w', encoding='utf-8') as file: 32 | json.dump(segments_data, file, ensure_ascii=False, indent=4) 33 | print(f"Segmentos virais salvos em {output_txt_file}\n") 34 | else: 35 | print(f"O arquivo {output_txt_file} já existe. Nenhuma entrada adicional é necessária.") -------------------------------------------------------------------------------- /i18n/scan_i18n.py: -------------------------------------------------------------------------------- 1 | import ast 2 | import glob 3 | import json 4 | from collections import OrderedDict 5 | 6 | 7 | def extract_i18n_strings(node): 8 | i18n_strings = [] 9 | 10 | if ( 11 | isinstance(node, ast.Call) 12 | and isinstance(node.func, ast.Name) 13 | and node.func.id == "i18n" 14 | ): 15 | for arg in node.args: 16 | if isinstance(arg, ast.Str): 17 | i18n_strings.append(arg.s) 18 | 19 | for child_node in ast.iter_child_nodes(node): 20 | i18n_strings.extend(extract_i18n_strings(child_node)) 21 | 22 | return i18n_strings 23 | 24 | 25 | # scan the directory for all .py files (recursively) 26 | # for each file, parse the code into an AST 27 | # for each AST, extract the i18n strings 28 | 29 | strings = [] 30 | for filename in glob.iglob("**/*.py", recursive=True): 31 | with open(filename, "r") as f: 32 | code = f.read() 33 | if "I18nAuto" in code: 34 | tree = ast.parse(code) 35 | i18n_strings = extract_i18n_strings(tree) 36 | print(filename, len(i18n_strings)) 37 | strings.extend(i18n_strings) 38 | code_keys = set(strings) 39 | """ 40 | n_i18n.py 41 | gui_v1.py 26 42 | app.py 16 43 | infer-web.py 147 44 | scan_i18n.py 0 45 | i18n.py 0 46 | lib/train/process_ckpt.py 1 47 | """ 48 | print() 49 | print("Total unique:", len(code_keys)) 50 | 51 | 52 | standard_file = "i18n/locale/zh_CN.json" 53 | with open(standard_file, "r", encoding="utf-8") as f: 54 | standard_data = json.load(f, object_pairs_hook=OrderedDict) 55 | standard_keys = set(standard_data.keys()) 56 | 57 | # Define the standard file name 58 | unused_keys = standard_keys - code_keys 59 | print("Unused keys:", len(unused_keys)) 60 | for unused_key in unused_keys: 61 | print("\t", unused_key) 62 | 63 | missing_keys = code_keys - standard_keys 64 | print("Missing keys:", len(missing_keys)) 65 | for missing_key in missing_keys: 66 | print("\t", missing_key) 67 | 68 | code_keys_dict = OrderedDict() 69 | for s in strings: 70 | code_keys_dict[s] = s 71 | 72 | # write back 73 | with open(standard_file, "w", encoding="utf-8") as f: 74 | json.dump(code_keys_dict, f, ensure_ascii=False, indent=4, sort_keys=True) 75 | f.write("\n") 76 | -------------------------------------------------------------------------------- /scripts/transcribe_cuts.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | import sys 4 | 5 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) 6 | 7 | def transcribe(project_folder="tmp"): 8 | def generate_whisperx(input_file, output_folder, model='large-v3'): 9 | output_file = os.path.join(output_folder, f"{os.path.splitext(os.path.basename(input_file))[0]}.srt") 10 | json_file = os.path.join(output_folder, f"{os.path.splitext(os.path.basename(input_file))[0]}.json") # Define the JSON output file 11 | 12 | # Skip processing if the JSON file already exists 13 | if os.path.exists(json_file): 14 | print(f"Arquivo já existe, pulando: {json_file}") 15 | return 16 | 17 | command = [ 18 | "whisperx", 19 | input_file, 20 | "--model", model, 21 | "--task", "transcribe", 22 | "--align_model", "WAV2VEC2_ASR_LARGE_LV60K_960H", 23 | "--chunk_size", "10", 24 | "--vad_onset", "0.4", 25 | "--vad_offset", "0.3", 26 | "--compute_type", "float32", 27 | "--batch_size", "10", 28 | "--output_dir", output_folder, 29 | "--output_format", "srt", 30 | "--output_format", "json", 31 | ] 32 | 33 | print(f"Transcrevendo: {input_file}...") 34 | result = subprocess.run(command, shell=True, text=True, capture_output=True) 35 | print(f"Comando executado: {command}") 36 | 37 | if result.returncode != 0: 38 | print("Erro durante a transcrição:") 39 | print(result.stderr) 40 | else: 41 | print(f"Transcrição concluída. Arquivo salvo em: {output_file} e {json_file}") 42 | # print(result.stdout) 43 | 44 | # Define o diretório de entrada e o diretório de saída 45 | input_folder = os.path.join(project_folder, 'final') 46 | output_folder = os.path.join(project_folder, 'subs') 47 | os.makedirs(output_folder, exist_ok=True) 48 | 49 | if not os.path.exists(input_folder): 50 | print(f"Pasta de entrada não encontrada: {input_folder}") 51 | return 52 | 53 | # Itera sobre todos os arquivos na pasta de entrada 54 | for filename in os.listdir(input_folder): 55 | if filename.endswith('.mp4'): # Filtra apenas arquivos .mp4 56 | input_file = os.path.join(input_folder, filename) 57 | generate_whisperx(input_file, output_folder) 58 | 59 | -------------------------------------------------------------------------------- /scripts/cut_json.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | 4 | def process_segments(data, start_time, end_time): 5 | new_segments = [] 6 | 7 | for segment in data.get('segments', []): 8 | seg_start = segment.get('start', 0) 9 | seg_end = segment.get('end', 0) 10 | 11 | # Verifica interseção 12 | if seg_end <= start_time or seg_start >= end_time: 13 | continue 14 | 15 | # Calcula overlap 16 | # Ajusta timestamps relativos ao corte 17 | new_seg_start = max(0, seg_start - start_time) 18 | new_seg_end = min(end_time, seg_end) - start_time 19 | 20 | # Filtra palavras se existirem 21 | new_words = [] 22 | if 'words' in segment: 23 | for word in segment['words']: 24 | w_start = word.get('start', 0) 25 | w_end = word.get('end', 0) 26 | 27 | if w_end > start_time and w_start < end_time: 28 | new_w_start = max(0, w_start - start_time) 29 | new_w_end = min(end_time, w_end) - start_time 30 | word_copy = word.copy() 31 | word_copy['start'] = new_w_start 32 | word_copy['end'] = new_w_end 33 | new_words.append(word_copy) 34 | 35 | # Se sobraram palavras ou se o segmento é válido no tempo 36 | if new_words or (new_seg_end > new_seg_start): 37 | new_segment = segment.copy() 38 | new_segment['start'] = new_seg_start 39 | new_segment['end'] = new_seg_end 40 | if 'words' in segment: 41 | new_segment['words'] = new_words 42 | new_segments.append(new_segment) 43 | 44 | return {'segments': new_segments} 45 | 46 | def cut_json_transcript(input_json_path, output_json_path, start_time, end_time): 47 | """ 48 | Lê o input.json (WhisperX), recorta o trecho e salva em output_json_path com timestamps ajustados. 49 | """ 50 | if not os.path.exists(input_json_path): 51 | print(f"Aviso: {input_json_path} não encontrado. Não foi possível gerar JSON do corte.") 52 | return 53 | 54 | try: 55 | with open(input_json_path, 'r', encoding='utf-8') as f: 56 | data = json.load(f) 57 | 58 | new_data = process_segments(data, start_time, end_time) 59 | 60 | with open(output_json_path, 'w', encoding='utf-8') as f: 61 | json.dump(new_data, f, indent=2, ensure_ascii=False) 62 | 63 | print(f"JSON de legenda gerado: {output_json_path}") 64 | 65 | except Exception as e: 66 | print(f"Erro ao cortar JSON: {e}") 67 | -------------------------------------------------------------------------------- /changelog.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## Fix 2 faces 4 | 5 | ### Melhorias na Detecção Facial e Layout 6 | - **Consistência Visual (2 Faces)**: Implementada lógica para "travar" a identidade dos rostos nas posições superior e inferior, impedindo que os participantes troquem de lugar durante o vídeo. 7 | - **Lógica de Fallback Inteligente**: Caso o rosto não seja detectado no frame atual, o sistema agora tenta recuperar a posição baseada no frame anterior, posterior ou na última coordenada válida conhecida. 8 | - **Intervalo de Detecção Personalizável**: Adicionada configuração para o usuário escolher a frequência da varredura facial, permitindo otimizar o tempo de renderização. 9 | 10 | ### Correções de Legendas 11 | - **Correção de Sobreposição**: Resolvido bug onde legendas apareciam sobrepostas em momentos de fala rápida. 12 | - **Refinamento de Centralização (2 Faces)**: Ajustes adicionais no cálculo de posição para garantir que a legenda fique perfeitamente centralizada no modo dividido. 13 | 14 | ## Atualizações Anteriores 15 | 16 | ### Refatoração e Melhorias de Código 17 | - **Refatoração do Script Principal**: Criação e aprimoramento do `main_improved.py` para melhorar a estrutura e manutenibilidade do pipeline de processamento. 18 | - **Padronização de Código (Inglês)**: Tradução completa de nomes de variáveis, funções e comentários internos para inglês, visando compatibilidade com padrões internacionais e colaboração open-source, mantendo logs de saída com suporte a i18n (`en_US`/`pt_BR`). 19 | - **Ajuste de Diretórios**: Reorganização da estrutura de pastas e caminhos de saída para maior organização dos arquivos gerados. 20 | 21 | ### Configuração e IA 22 | - **Integração Multi-LLM**: Implementação de suporte ao **g4f** (GPT-4 Free) e **Google Gemini**. 23 | - **API Config**: Centralização das chaves e seleção de modelos no novo arquivo `api_config.json`, permitindo troca rápida de provedor de IA sem alterar o código. 24 | - **Gerenciamento de Prompts**: Criação do arquivo `prompt.txt` para edição fácil do prompt do sistema. 25 | 26 | ### Legendas e Transcrição (Whisper) 27 | - **Correções no Whisper**: Solução robusta para erros de `unpickling`, conflitos de DLLs (`libprotobuf`, `torchaudio`) e detecção de GPU. 28 | - **Otimização do Fluxo (Slicing)**: O vídeo original é transcrito apenas uma vez. Os cortes reutilizam o JSON original, eliminando a re-transcrição e acelerando o processo. 29 | - **Posicionamento de Legendas**: Correção da lógica de alinhamento para centralização no modo "2-face". 30 | 31 | ### Processamento de Vídeo e Detecção Facial 32 | - **Novo Motor: InsightFace**: Adição da biblioteca `InsightFace` como motor de detecção facial de alta precisão. 33 | - **MediaPipe**: Manutenção e correção de erros no fallback para o MediaPipe. 34 | - **Limpeza de Logs**: Redução da verbosidade dos logs do FFmpeg no console. -------------------------------------------------------------------------------- /scripts/download_video.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | import yt_dlp 4 | 5 | def sanitize_filename(name): 6 | """Remove caracteres inválidos para nomes de arquivos/pastas.""" 7 | cleaned = re.sub(r'[\\/*?:"<>|]', "", name) 8 | cleaned = cleaned.strip() 9 | return cleaned 10 | 11 | def download(url, base_root="VIRALS"): 12 | # 1. Extrair informações do vídeo (sem baixar) para pegar o título 13 | print("Extraindo informações do vídeo...") 14 | with yt_dlp.YoutubeDL({'quiet': True, 'no_warnings': True, 'cookiesfrombrowser': ('chrome',)}) as ydl: 15 | try: 16 | info = ydl.extract_info(url, download=False) 17 | title = info.get('title', 'Untitled_Video') 18 | safe_title = sanitize_filename(title) 19 | except Exception as e: 20 | print(f"Erro ao obter informações do vídeo: {e}") 21 | safe_title = "Unknown_Video" 22 | 23 | # 2. Criar estrutura de pastas 24 | project_folder = os.path.join(base_root, safe_title) 25 | os.makedirs(project_folder, exist_ok=True) 26 | 27 | # Caminho final do vídeo 28 | # O yt-dlp com 'outtmpl' e merge_output_format mp4 vai gerar .mp4 29 | # Mas precisamos garantir que seja exatamente 'input.mp4' para facilitar 30 | output_filename = 'input' # sem extensao pro ydl botar 31 | output_path_base = os.path.join(project_folder, output_filename) 32 | final_video_path = f"{output_path_base}.mp4" 33 | 34 | # Verificação inteligente: Se o arquivo já existe, reutiliza sem baixar de novo. 35 | if os.path.exists(final_video_path): 36 | # Validação simples de tamanho (evita arquivos vazios de falhas anteriores) 37 | if os.path.getsize(final_video_path) > 1024: # > 1KB 38 | print(f"Vídeo já existe em: {final_video_path}") 39 | print("Pulando download e reutilizando arquivo local.") 40 | return final_video_path, project_folder 41 | else: 42 | print("Arquivo existente encontrado mas parece corrompido/vazio. Baixando novamente...") 43 | try: 44 | os.remove(final_video_path) 45 | except: 46 | pass 47 | 48 | # Limpeza de temp 49 | temp_path = f"{output_path_base}.temp.mp4" 50 | if os.path.exists(temp_path): 51 | try: 52 | os.remove(temp_path) 53 | except: 54 | pass 55 | 56 | ydl_opts = { 57 | 'format': 'bestvideo+bestaudio/best', 58 | 'overwrites': True, 59 | # 'outtmpl' define o nome base. 60 | # 'merge_output_format' garante que se houver merge (video+audio), será mp4. 61 | # Removemos o FFmpegVideoConvertor explícito para evitar conflito de rename no Windows. 62 | 'outtmpl': output_path_base, 63 | 'postprocessor_args': [ 64 | '-movflags', 'faststart' 65 | ], 66 | 'merge_output_format':'mp4' 67 | 68 | } 69 | 70 | print(f"Baixando vídeo para: {project_folder}...") 71 | while True: 72 | try: 73 | with yt_dlp.YoutubeDL(ydl_opts) as ydl: 74 | ydl.download([url]) 75 | break 76 | except yt_dlp.utils.DownloadError as e: 77 | if "is not a valid URL" in str(e): 78 | print("Erro: o link inserido não é válido.") 79 | url = input("\nPor favor, insira um link válido: ") 80 | else: 81 | raise 82 | 83 | return final_video_path, project_folder -------------------------------------------------------------------------------- /scripts/organize_output.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import shutil 4 | import re 5 | from i18n.i18n import I18nAuto 6 | 7 | i18n = I18nAuto() 8 | 9 | def sanitize_filename(name): 10 | """Remove caracteres inválidos para nomes de arquivos/pastas.""" 11 | # Remove caracteres inválidos como / \ : * ? " < > | 12 | cleaned = re.sub(r'[\\/*?:"<>|]', "", name) 13 | # Remove espaços extras e quebras de linha 14 | cleaned = cleaned.strip() 15 | return cleaned 16 | 17 | def organize(): 18 | print(i18n("Organizing output files...")) 19 | 20 | # Caminhos 21 | meta_path = "tmp/viral_segments.txt" 22 | burned_folder = "burned_sub" 23 | virals_root = "VIRALS" 24 | 25 | if not os.path.exists(meta_path): 26 | print(i18n("Metadata file not found: ") + meta_path) 27 | return 28 | 29 | try: 30 | with open(meta_path, 'r', encoding='utf-8') as f: 31 | data = json.load(f) 32 | segments = data.get("segments", []) 33 | except Exception as e: 34 | print(i18n("Error reading metadata: ") + str(e)) 35 | return 36 | 37 | os.makedirs(virals_root, exist_ok=True) 38 | 39 | processed_count = 0 40 | 41 | for i, segment in enumerate(segments): 42 | title = segment.get("title", f"Viral_Segment_{i+1}") 43 | clean_title = sanitize_filename(title) 44 | 45 | # Se o título estiver vazio após sanitização, usa fallback 46 | if not clean_title: 47 | clean_title = f"Viral_Segment_{i+1}" 48 | 49 | # Cria pasta do viral 50 | viral_folder = os.path.join(virals_root, clean_title) 51 | os.makedirs(viral_folder, exist_ok=True) 52 | 53 | # Identifica o arquivo de vídeo final 54 | # Padrão esperado: outputXXX_original_scale_subtitled.mp4 55 | # O padrão pode variar dependendo de como o burn_subtitles foi executado, mas geralmente segue o index 56 | # Vamos tentar localizar pelo padrão de índice 57 | 58 | video_filename_pattern = f"output{str(i).zfill(3)}_original_scale_subtitled.mp4" 59 | source_video = os.path.join(burned_folder, video_filename_pattern) 60 | 61 | # Se não encontrar com subtitled, tenta sem (caso burn tenha sido pulado?) 62 | if not os.path.exists(source_video): 63 | # Tenta na pasta 'final' se não tiver legenda queimada 64 | source_video_final = os.path.join("final", f"output{str(i).zfill(3)}_original_scale.mp4") 65 | if os.path.exists(source_video_final): 66 | source_video = source_video_final 67 | else: 68 | # Tenta padrao sem 'original_scale' ou outras variações se necessário 69 | print(i18n(f"Warning: Could not find video file for segment {i+1} ({title})")) 70 | continue 71 | 72 | # Define caminhos finais 73 | target_video = os.path.join(viral_folder, f"{clean_title}.mp4") 74 | target_json = os.path.join(viral_folder, f"{clean_title}.json") 75 | 76 | # Mover/Copiar Vídeo 77 | try: 78 | shutil.copy2(source_video, target_video) 79 | except Exception as e: 80 | print(i18n(f"Error copying video for segment {i}: {e}")) 81 | continue 82 | 83 | # Salvar JSON individual 84 | try: 85 | with open(target_json, 'w', encoding='utf-8') as f: 86 | json.dump(segment, f, ensure_ascii=False, indent=4) 87 | except Exception as e: 88 | print(i18n(f"Error saving JSON for segment {i}: {e}")) 89 | 90 | processed_count += 1 91 | print(i18n(f"Saved: {clean_title}")) 92 | 93 | print(i18n(f"Organization completed. {processed_count} virals saved in '{virals_root}' folder.")) 94 | 95 | if __name__ == "__main__": 96 | organize() 97 | -------------------------------------------------------------------------------- /scripts/burn_subtitles.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | import sys 4 | 5 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) 6 | 7 | def burn(project_folder="tmp"): 8 | # Converter para absoluto para não ter erro no filtro do ffmpeg 9 | if project_folder and not os.path.isabs(project_folder): 10 | project_folder_abs = os.path.abspath(project_folder) 11 | else: 12 | project_folder_abs = project_folder 13 | 14 | # Caminhos das pastas 15 | subs_folder = os.path.join(project_folder_abs, 'subs_ass') 16 | videos_folder = os.path.join(project_folder_abs, 'final') 17 | output_folder = os.path.join(project_folder_abs, 'burned_sub') # Pasta para salvar os vídeos com legendas 18 | 19 | # Cria a pasta de saída se não existir 20 | os.makedirs(output_folder, exist_ok=True) 21 | 22 | if not os.path.exists(videos_folder): 23 | print(f"Pasta de vídeos finais não encontrada: {videos_folder}") 24 | return 25 | 26 | # Itera sobre os arquivos de vídeo na pasta final 27 | files = os.listdir(videos_folder) 28 | if not files: 29 | print("Nenhum arquivo encontrado em 'final' para queimar legendas.") 30 | return 31 | 32 | for video_file in files: 33 | if video_file.endswith(('.mp4', '.mkv', '.avi')): # Formatos suportados 34 | # Se for temp file (ex: temp_video_no_audio), ignora se existir a versão final 35 | if "temp_video_no_audio" in video_file: 36 | continue 37 | 38 | # Extrai o nome base do vídeo (sem extensão) 39 | video_name = os.path.splitext(video_file)[0] 40 | 41 | # O edit_video gera 'final-outputXXX_processed'. 42 | # O transcribe_cuts gera SRT/JSON com base nisso. 43 | # O adjust gera ASS com base no JSON. 44 | # Então o nome deve bater. 45 | 46 | # Define o caminho para a legenda correspondente 47 | subtitle_file = os.path.join(subs_folder, f"{video_name}.ass") 48 | 49 | # Verifica se a legenda existe 50 | if os.path.exists(subtitle_file): 51 | # Define o caminho de saída para o vídeo com legendas 52 | output_file = os.path.join(output_folder, f"{video_name}_subtitled.mp4") 53 | 54 | # Ajuste no caminho da legenda para FFmpeg (Forward Slash e escape de :) 55 | # No Windows, "C:/foo" funciona se estiver entre aspas simples dentro do filtro. 56 | # Para garantir, usamos replace e forward slashes. 57 | subtitle_file_ffmpeg = subtitle_file.replace('\\', '/').replace(':', '\\:') 58 | 59 | # Comando FFmpeg para adicionar as legendas 60 | command = [ 61 | "ffmpeg", "-y", "-loglevel", "error", "-hide_banner", 62 | '-i', os.path.join(videos_folder, video_file), # Vídeo de entrada 63 | '-vf', f"subtitles='{subtitle_file_ffmpeg}'", # Filtro de legendas 64 | '-c:v', 'h264_nvenc', # Codificador NVIDIA 65 | '-preset', 'p1', # Preset para velocidade 66 | '-b:v', '5M', # Bitrate 67 | '-c:a', 'copy', # Copia o áudio 68 | output_file 69 | ] 70 | 71 | # Log 72 | print(f"Processando vídeo: {video_file}") 73 | # print(f"Comando: {' '.join(command)}") 74 | 75 | # Executa o comando 76 | try: 77 | subprocess.run(command, check=True, capture_output=True) 78 | print(f"Processado: {output_file}") 79 | except subprocess.CalledProcessError as e: 80 | print(f"Erro ao queimar legendas em {video_name}: {e}") 81 | else: 82 | print(f"Legenda não encontrada para: {video_name} em {subtitle_file}") 83 | 84 | -------------------------------------------------------------------------------- /scripts/one_face.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import os 4 | import subprocess 5 | import mediapipe as mp 6 | 7 | def crop_and_resize_single_face(frame, face): 8 | frame_height, frame_width = frame.shape[:2] 9 | 10 | x, y, w, h = face 11 | face_center_x = x + w // 2 12 | face_center_y = y + h // 2 13 | 14 | # Cálculo da proporção desejada (9:16) 15 | target_aspect_ratio = 9 / 16 16 | 17 | # Cálculo da área de corte para evitar barras pretas 18 | if frame_width / frame_height > target_aspect_ratio: 19 | new_width = int(frame_height * target_aspect_ratio) 20 | new_height = frame_height 21 | else: 22 | new_width = frame_width 23 | new_height = int(frame_width / target_aspect_ratio) 24 | 25 | # Garantir que o corte esteja dentro dos limites 26 | crop_x = max(0, min(face_center_x - new_width // 2, frame_width - new_width)) 27 | crop_y = max(0, min(face_center_y - new_height // 2, frame_height - new_height)) 28 | crop_x2 = crop_x + new_width 29 | crop_y2 = crop_y + new_height 30 | 31 | # Recorte e redimensionamento para 1080x1920 (9:16) 32 | crop_img = frame[crop_y:crop_y2, crop_x:crop_x2] 33 | resized = cv2.resize(crop_img, (1080, 1920), interpolation=cv2.INTER_AREA) 34 | 35 | return resized 36 | 37 | def resize_with_padding(frame): 38 | frame_height, frame_width = frame.shape[:2] 39 | target_aspect_ratio = 9 / 16 40 | 41 | if frame_width / frame_height > target_aspect_ratio: 42 | new_width = frame_width 43 | new_height = int(frame_width / target_aspect_ratio) 44 | else: 45 | new_height = frame_height 46 | new_width = int(frame_height * target_aspect_ratio) 47 | 48 | # Criação de uma tela preta 49 | result = np.zeros((new_height, new_width, 3), dtype=np.uint8) 50 | 51 | # Cálculo das margens 52 | pad_top = (new_height - frame_height) // 2 53 | pad_left = (new_width - frame_width) // 2 54 | 55 | # Colocar o frame original na tela 56 | result[pad_top:pad_top+frame_height, pad_left:pad_left+frame_width] = frame 57 | 58 | # Redimensionar para as dimensões finais 59 | return cv2.resize(result, (1080, 1920), interpolation=cv2.INTER_AREA) 60 | 61 | def detect_face_or_body(frame, face_detection, face_mesh, pose): 62 | # Converter a imagem para RGB 63 | frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) 64 | 65 | # Processar a detecção de rosto 66 | results_face_detection = face_detection.process(frame_rgb) 67 | results_face_mesh = face_mesh.process(frame_rgb) 68 | results_pose = pose.process(frame_rgb) 69 | 70 | detections = [] 71 | 72 | # Usar a detecção de rosto se disponível 73 | if results_face_detection.detections: 74 | # Usar o primeiro rosto detectado 75 | detection = results_face_detection.detections[0] 76 | bbox = detection.location_data.relative_bounding_box 77 | x_min = int(bbox.xmin * frame.shape[1]) 78 | y_min = int(bbox.ymin * frame.shape[0]) 79 | width = int(bbox.width * frame.shape[1]) 80 | height = int(bbox.height * frame.shape[0]) 81 | detections.append((x_min, y_min, width, height)) 82 | 83 | # Usar landmarks do face mesh se disponível 84 | if results_face_mesh.multi_face_landmarks: 85 | landmarks = results_face_mesh.multi_face_landmarks[0].landmark 86 | # Coordenadas do rosto baseadas nos pontos-chave (landmarks) 87 | x_coords = [int(landmark.x * frame.shape[1]) for landmark in landmarks] 88 | y_coords = [int(landmark.y * frame.shape[0]) for landmark in landmarks] 89 | x_min, x_max = min(x_coords), max(x_coords) 90 | y_min, y_max = min(y_coords), max(y_coords) 91 | width = x_max - x_min 92 | height = y_max - y_min 93 | detections.append((x_min, y_min, width, height)) 94 | 95 | # Se nenhum rosto for detectado, usar a pose para estimar o corpo 96 | if results_pose.pose_landmarks: 97 | x_coords = [lmk.x for lmk in results_pose.pose_landmarks.landmark] 98 | y_coords = [lmk.y for lmk in results_pose.pose_landmarks.landmark] 99 | x_min = int(min(x_coords) * frame.shape[1]) 100 | x_max = int(max(x_coords) * frame.shape[1]) 101 | y_min = int(min(y_coords) * frame.shape[0]) 102 | y_max = int(max(y_coords) * frame.shape[0]) 103 | width = x_max - x_min 104 | height = y_max - y_min 105 | detections.append((x_min, y_min, width, height)) 106 | 107 | # Se nada for detectado, retornar uma lista vazia 108 | return detections if detections else None 109 | 110 | -------------------------------------------------------------------------------- /scripts/face_detection_insightface.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import os 4 | import sys 5 | from contextlib import contextmanager 6 | import warnings 7 | 8 | # Suppress warnings 9 | warnings.filterwarnings("ignore") 10 | 11 | try: 12 | from insightface.app import FaceAnalysis 13 | INSIGHTFACE_AVAILABLE = True 14 | except ImportError: 15 | INSIGHTFACE_AVAILABLE = False 16 | 17 | app = None 18 | 19 | @contextmanager 20 | def suppress_stdout_stderr(): 21 | """A context manager that redirects stdout and stderr to devnull""" 22 | with open(os.devnull, "w") as devnull: 23 | old_stdout = sys.stdout 24 | old_stderr = sys.stderr 25 | sys.stdout = devnull 26 | sys.stderr = devnull 27 | try: 28 | yield 29 | finally: 30 | sys.stdout = old_stdout 31 | sys.stderr = old_stderr 32 | 33 | def init_insightface(): 34 | """Explicit initialization if needed outside import.""" 35 | global app 36 | if not INSIGHTFACE_AVAILABLE: 37 | raise ImportError("InsightFace not installed. Please install it.") 38 | 39 | if app is None: 40 | # Provider options to reduce logging if possible (often needs env var) 41 | # But redirection is safer for C++ logs 42 | providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] 43 | 44 | with suppress_stdout_stderr(): 45 | app = FaceAnalysis(name='buffalo_l', providers=providers) 46 | app.prepare(ctx_id=0, det_size=(640, 640)) 47 | return app 48 | 49 | def detect_faces_insightface(frame): 50 | """ 51 | Detect faces using InsightFace. 52 | Returns a list of dicts with 'bbox' and 'kps'. 53 | bbox is [x1, y1, x2, y2], kps is 5 keypoints (eyes, nose, mouth corners). 54 | """ 55 | global app 56 | if app is None: 57 | init_insightface() 58 | 59 | faces = app.get(frame) 60 | results = [] 61 | for face in faces: 62 | # Convert bbox to int 63 | bbox = face.bbox.astype(int) 64 | results.append({ 65 | 'bbox': bbox, # [x1, y1, x2, y2] 66 | 'kps': face.kps, 67 | 'det_score': face.det_score 68 | }) 69 | return results 70 | 71 | def crop_and_resize_insightface(frame, face_bbox, target_width=1080, target_height=1920): 72 | """ 73 | Crops and resizes the frame to target dimensions centered on the face_bbox. 74 | face_bbox: [x1, y1, x2, y2] 75 | """ 76 | h, w, _ = frame.shape 77 | x1, y1, x2, y2 = face_bbox 78 | 79 | face_center_x = (x1 + x2) // 2 80 | face_center_y = (y1 + y2) // 2 81 | 82 | # Calculate crop area based on target aspect ratio and face position 83 | # We want to keep the face roughly in the upper-middle or center? 84 | # Usually center for simple implementation, or slightly upper for "talking head". 85 | 86 | # Logic similar to one_face.py but adapted 87 | 88 | # Determine the scaling factor to ensure the crop covers the target height 89 | # Ideally we want the height of the video to match the target height after resize 90 | # But usually we source from landscape (16:9) to portrait (9:16). 91 | # We need to crop a 9:16 area from the source. 92 | 93 | # Calculate source crop height/width maintaining 9:16 ratio 94 | # Trying to maximize height usage of the source frame usually. 95 | 96 | # Let's say we want to use the full height of the source if possible 97 | source_h = h 98 | source_w = int(source_h * (target_width / target_height)) 99 | 100 | if source_w > w: 101 | # If the calculated width is wider than the source image, we are limited by width 102 | source_w = w 103 | source_h = int(source_w * (target_height / target_width)) 104 | 105 | # Calculate top-left corner of the crop 106 | crop_x1 = face_center_x - (source_w // 2) 107 | crop_y1 = face_center_y - (source_h // 2) # Center vertically on face 108 | 109 | # Adjust to stay within bounds 110 | if crop_x1 < 0: 111 | crop_x1 = 0 112 | elif crop_x1 + source_w > w: 113 | crop_x1 = w - source_w 114 | 115 | if crop_y1 < 0: 116 | crop_y1 = 0 117 | elif crop_y1 + source_h > h: 118 | crop_y1 = h - source_h 119 | 120 | crop_x2 = crop_x1 + source_w 121 | crop_y2 = crop_y1 + source_h 122 | 123 | # Crop 124 | cropped = frame[crop_y1:crop_y2, crop_x1:crop_x2] 125 | 126 | # Resize to final target 127 | result = cv2.resize(cropped, (target_width, target_height), interpolation=cv2.INTER_LINEAR) 128 | 129 | return result 130 | 131 | if __name__ == "__main__": 132 | # Test block 133 | print("Testing InsightFace...") 134 | # Create a dummy image or try to load one if available, but for now just print config 135 | print("InsightFace initialized.") 136 | -------------------------------------------------------------------------------- /i18n/locale/en_US.json: -------------------------------------------------------------------------------- 1 | { 2 | "main": "main", 3 | "Burn only mode activated. Skipping to subtitle burning...": "Burn only mode activated. Skipping to subtitle burning...", 4 | "Subtitle burning completed.": "Subtitle burning completed.", 5 | "Enter the YouTube video URL (or press Enter to use latest project): ": "Enter the YouTube video URL (or press Enter to use latest project): ", 6 | "Enter the number of viral segments to create: ": "Enter the number of viral segments to create: ", 7 | "\nError: Number must be greater than 0.": "\nError: Number must be greater than 0.", 8 | "Error: The value you entered is not an integer. Please try again.": "Error: The value you entered is not an integer. Please try again.", 9 | "Do you want viral mode? (yes/no): ": "Do you want viral mode? (yes/no): ", 10 | "Enter themes (comma-separated, leave blank if viral mode is True): ": "Enter themes (comma-separated, leave blank if viral mode is True): ", 11 | "Subtitle burning skipped.": "Subtitle burning skipped.", 12 | "Process completed successfully!": "Process completed successfully!", 13 | "Using latest project: {}": "Using latest project: {}", 14 | "Latest project found but 'input.mp4' is missing.": "Latest project found but 'input.mp4' is missing.", 15 | "No existing projects found in VIRALS folder.": "No existing projects found in VIRALS folder.", 16 | "VIRALS folder not found. Cannot load latest project.": "VIRALS folder not found. Cannot load latest project.", 17 | "\nExisting viral segments found: {}": "\nExisting viral segments found: {}", 18 | "Use existing viral segments? (yes/no) [default: yes]: ": "Use existing viral segments? (yes/no) [default: yes]: ", 19 | "Loaded existing viral segments. Skipping configuration prompts.": "Loaded existing viral segments. Skipping configuration prompts.", 20 | "Error loading JSON: {}.": "Error loading JSON: {}.", 21 | "\nCurrent duration settings: {}s - {}s": "\nCurrent duration settings: {}s - {}s", 22 | "Change duration? (y/n) [default: n]: ": "Change duration? (y/n) [default: n]: ", 23 | "Minimum duration [{}]: ": "Minimum duration [{}]: ", 24 | "Maximum duration [{}]: ": "Maximum duration [{}]: ", 25 | "Invalid number. Using previous values.": "Invalid number. Using previous values.", 26 | "Using AI Backend from config: {}": "Using AI Backend from config: {}", 27 | "Select AI Backend for Viral Analysis:": "Select AI Backend for Viral Analysis:", 28 | "1. Gemini API (Best / Recommended)": "1. Gemini API (Best / Recommended)", 29 | "2. G4F (Free / Experimental)": "2. G4F (Free / Experimental)", 30 | "3. Manual (Copy/Paste Prompt)": "3. Manual (Copy/Paste Prompt)", 31 | "Choose (1/2/3): ": "Choose (1/2/3): ", 32 | "Gemini API Key not found in api_config.json or arguments.": "Gemini API Key not found in api_config.json or arguments.", 33 | "Enter your Gemini API Key: ": "Enter your Gemini API Key: ", 34 | "Select Processing Workflow:": "Select Processing Workflow:", 35 | "1. Full Processing (Face Crop 9:16 + Subtitles) [Default]": "1. Full Processing (Face Crop 9:16 + Subtitles) [Default]", 36 | "2. Cut Only (Keep Original Size, No Subtitles)": "2. Cut Only (Keep Original Size, No Subtitles)", 37 | "Choose (1/2): ": "Choose (1/2): ", 38 | "Select Face Detection Model:": "Select Face Detection Model:", 39 | "1. InsightFace (Recommended - High Accuracy, GPU Support) [Default]": "1. InsightFace (Recommended - High Accuracy, GPU Support) [Default]", 40 | "2. MediaPipe (Google - Fast, lightweight)": "2. MediaPipe (Google - Fast, lightweight)", 41 | "Select Face Tracking Mode:": "Select Face Tracking Mode:", 42 | "1. Auto (Detect 1 or 2 faces dynamically) [Default]": "1. Auto (Detect 1 or 2 faces dynamically) [Default]", 43 | "2. 1 Face (Focus on largest face)": "2. 1 Face (Focus on largest face)", 44 | "3. 2 Faces (Split Screen)": "3. 2 Faces (Split Screen)", 45 | "Error: No URL provided and no existing video selected.": "Error: No URL provided and no existing video selected.", 46 | "Starting download...": "Starting download...", 47 | "Transcribing with model {}...": "Transcribing with model {}...", 48 | "Creating viral segments using {}...": "Creating viral segments using {}...", 49 | "Cuts already exist. Cut again? (yes/no) [default: no]: ": "Cuts already exist. Cut again? (yes/no) [default: no]: ", 50 | "Skipping Video Rendering (using existing cuts), but updating Subtitle JSONs...": "Skipping Video Rendering (using existing cuts), but updating Subtitle JSONs...", 51 | "Cutting segments...": "Cutting segments...", 52 | "Cut Only selected. Skipping Face Crop and Subtitles.": "Cut Only selected. Skipping Face Crop and Subtitles.", 53 | "Process completed! Check your results in: {}": "Process completed! Check your results in: {}", 54 | "Editing video with {} (Mode: {})...": "Editing video with {} (Mode: {})...", 55 | "Processing subtitles...": "Processing subtitles...", 56 | "Configuring subtitles for Split Screen (Center Position)...": "Configuring subtitles for Split Screen (Center Position)...", 57 | "\nExisting cuts found in: {}": "\nExisting cuts found in: {}", 58 | "\nAn error occurred: {}": "\nAn error occurred: {}" 59 | } -------------------------------------------------------------------------------- /i18n/locale/pt_BR.json: -------------------------------------------------------------------------------- 1 | { 2 | "main": "main", 3 | "Burn only mode activated. Skipping to subtitle burning...": "Modo de apenas queimar legendas ativado. Pulando para a queima de legendas...", 4 | "Subtitle burning completed.": "Queima de legendas concluída.", 5 | "Enter the YouTube video URL (or press Enter to use latest project): ": "Digite a URL do vídeo do YouTube (ou pressione Enter para usar o projeto mais recente): ", 6 | "Enter the number of viral segments to create: ": "Digite o número de segmentos virais para criar: ", 7 | "\nError: Number must be greater than 0.": "\nErro: O número deve ser maior que 0.", 8 | "Error: The value you entered is not an integer. Please try again.": "Erro: O valor que você digitou não é um número inteiro. Tente novamente.", 9 | "Do you want viral mode? (yes/no): ": "Você quer o modo viral? (sim/não): ", 10 | "Enter themes (comma-separated, leave blank if viral mode is True): ": "Digite temas (separados por vírgula, deixe em branco se o modo viral for Verdadeiro): ", 11 | "Subtitle burning skipped.": "Queima de legendas ignorada.", 12 | "Process completed successfully!": "Processo concluído com sucesso!", 13 | "Using latest project: {}": "Usando o projeto mais recente: {}", 14 | "Latest project found but 'input.mp4' is missing.": "Projeto mais recente encontrado, mas 'input.mp4' está faltando.", 15 | "No existing projects found in VIRALS folder.": "Nenhum projeto existente encontrado na pasta VIRALS.", 16 | "VIRALS folder not found. Cannot load latest project.": "Pasta VIRALS não encontrada. Não é possível carregar o projeto mais recente.", 17 | "\nExisting viral segments found: {}": "\nSegmentos virais existentes encontrados: {}", 18 | "Use existing viral segments? (yes/no) [default: yes]: ": "Usar segmentos virais existentes? (sim/não) [padrão: sim]: ", 19 | "Loaded existing viral segments. Skipping configuration prompts.": "Segmentos virais existentes carregados. Pulando prompts de configuração.", 20 | "Error loading JSON: {}.": "Erro ao carregar JSON: {}.", 21 | "\nCurrent duration settings: {}s - {}s": "\nConfigurações de duração atuais: {}s - {}s", 22 | "Change duration? (y/n) [default: n]: ": "Alterar duração? (s/n) [padrão: n]: ", 23 | "Minimum duration [{}]: ": "Duração mínima [{}]: ", 24 | "Maximum duration [{}]: ": "Duração máxima [{}]: ", 25 | "Invalid number. Using previous values.": "Número inválido. Usando valores anteriores.", 26 | "Using AI Backend from config: {}": "Usando Backend de IA da configuração: {}", 27 | "Select AI Backend for Viral Analysis:": "Selecione o Backend de IA para Análise Viral:", 28 | "1. Gemini API (Best / Recommended)": "1. Gemini API (Melhor / Recomendado)", 29 | "2. G4F (Free / Experimental)": "2. G4F (Grátis / Experimental)", 30 | "3. Manual (Copy/Paste Prompt)": "3. Manual (Copiar/Colar Prompt)", 31 | "Choose (1/2/3): ": "Escolha (1/2/3): ", 32 | "Gemini API Key not found in api_config.json or arguments.": "Chave da API Gemini não encontrada em api_config.json ou argumentos.", 33 | "Enter your Gemini API Key: ": "Digite sua chave de API Gemini: ", 34 | "Select Processing Workflow:": "Selecione o Fluxo de Trabalho de Processamento:", 35 | "1. Full Processing (Face Crop 9:16 + Subtitles) [Default]": "1. Processamento Completo (Recorte de Rosto 9:16 + Legendas) [Padrão]", 36 | "2. Cut Only (Keep Original Size, No Subtitles)": "2. Apenas Cortar (Manter Tamanho Original, Sem Legendas)", 37 | "Choose (1/2): ": "Escolha (1/2): ", 38 | "Select Face Detection Model:": "Selecione o Modelo de Detecção de Rosto:", 39 | "1. InsightFace (Recommended - High Accuracy, GPU Support) [Default]": "1. InsightFace (Recomendado - Alta Precisão, Suporte a GPU) [Padrão]", 40 | "2. MediaPipe (Google - Fast, lightweight)": "2. MediaPipe (Google - Rápido, leve)", 41 | "Select Face Tracking Mode:": "Selecione o Modo de Rastreamento de Rosto:", 42 | "1. Auto (Detect 1 or 2 faces dynamically) [Default]": "1. Auto (Detectar 1 ou 2 rostos dinamicamente) [Padrão]", 43 | "2. 1 Face (Focus on largest face)": "2. 1 Rosto (Focar no maior rosto)", 44 | "3. 2 Faces (Split Screen)": "3. 2 Rostos (Tela Dividida)", 45 | "Error: No URL provided and no existing video selected.": "Erro: Nenhuma URL fornecida e nenhum vídeo existente selecionado.", 46 | "Starting download...": "Iniciando download...", 47 | "Transcribing with model {}...": "Transcrevendo com modelo {}...", 48 | "Creating viral segments using {}...": "Criando segmentos virais usando {}...", 49 | "Cuts already exist. Cut again? (yes/no) [default: no]: ": "Cortes já existem. Cortar novamente? (sim/não) [padrão: não]: ", 50 | "Skipping Video Rendering (using existing cuts), but updating Subtitle JSONs...": "Pulando Renderização de Vídeo (usando cortes existentes), mas atualizando JSONs de Legenda...", 51 | "Cutting segments...": "Cortando segmentos...", 52 | "Cut Only selected. Skipping Face Crop and Subtitles.": "Selecionado Apenas Cortar. Pulando Recorte de Rosto e Legendas.", 53 | "Process completed! Check your results in: {}": "Processo concluído! Verifique seus resultados em: {}", 54 | "Editing video with {} (Mode: {})...": "Editando vídeo com {} (Modo: {})...", 55 | "Processing subtitles...": "Processando legendas...", 56 | "Configuring subtitles for Split Screen (Center Position)...": "Configurando legendas para Tela Dividida (Posição Central)...", 57 | "\nExisting cuts found in: {}": "\nCortes existentes encontrados em: {}", 58 | "\nAn error occurred: {}": "\nOcorreu um erro: {}" 59 | } -------------------------------------------------------------------------------- /README_en.md: -------------------------------------------------------------------------------- 1 | # ViralCutter: Viral Video Generator 2 | [ ![](https://dcbadge.vercel.app/api/server/aihubbrasil) ](https://discord.gg/aihubbrasil)
3 | [English](https://github.com/RafaelGodoyEbert/ViralCutter/blob/main/README_en.md) | [Português](https://github.com/RafaelGodoyEbert/ViralCutter/blob/main/README.md) 4 | 5 | ## **Description** 6 | ViralCutter is an innovative tool designed to generate viral videos from existing content. With advanced video and audio processing techniques, ViralCutter cuts and edits video segments that are perfect for sharing on social media. Using the WhisperX model for transcription and automatic caption generation, it adapts videos to the 9:16 (vertical) format, ideal for platforms like TikTok, Instagram Reels, and YouTube Shorts. 7 | 8 | ## **What's New & Updates (Changelog)** 9 | 10 | Check out the latest improvements: 11 | 12 | - **Performance Optimization**: Transcription "slicing" implemented. The video is transcribed only once, and cuts reuse the data, eliminating reprocessing. 13 | - **Flexible AI Support**: Native integration with **Gemini API** and experimental support for **G4F** (GPT-4 Free), plus a Manual mode. 14 | - **External Configuration**: `api_config.json` and `prompt.txt` files for easy customization without touching the code. 15 | - **Face Fix**: MediaPipe fix for precise face tracking without relying on "Center Crop". 16 | - **Subtitle Improvements**: Smart positioning for 2-face videos (split screen) and style corrections. 17 | 18 | *(See [changelog.md](changelog.md) for full details)* 19 | 20 | ## **Features** 21 | 22 | - **Video Download**: Downloads YouTube videos via a provided URL. 23 | - **Audio Transcription**: Converts audio to text using the WhisperX model. 24 | - **Viral Segment Identification**: Uses AI to detect parts of the video with high viral potential. 25 | - **Cutting & Formatting**: Cuts selected segments and adjusts the aspect ratio to 9:16. 26 | - **Smart Cropping**: Keeps the speaker in focus (Face Tracking) or uses automatic Split Screen (2-Faces) mode. 27 | - **Audio/Video Merging**: Combines transcribed audio with processed video clips. 28 | - **Batch Export**: Generates a ZIP file with all created viral videos, facilitating download and sharing. 29 | - **Custom Captions**: Create custom captions with colors, highlights, no highlights, or word-by-word styles, offering extensive editing possibilities. 30 | 31 | 32 | ## **How to Use** 33 | 37 | 38 | - Open the link and follow the steps in order(Only Portuguese, sorry): [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1UZKzeqjIeEyvq9nPx7s_4mU6xlkZQn_R?usp=sharing#scrollTo=pa36OeArowme)
39 | 40 | 41 | ## **Limitations** 42 | 43 | - The quality of generated videos may vary based on the quality of the original video. 44 | - Processing time depends heavily on your GPU. 45 | - The **G4F** model may be unstable or have request limits. Use **Gemini** for greater stability (requires an api_key). 46 | 47 | ## Inspiration 48 | This project was inspired by the following repositories: 49 | 50 | * [Reels Clips Automator](https://github.com/eddieoz/reels-clips-automator) 51 | * [YoutubeVideoToAIPoweredShorts](https://github.com/Fitsbit/YoutubeVideoToAIPoweredShorts) 52 | 53 | ## TODO📝 54 | - [x] Release code 55 | - [ ] Huggingface SpaceDemo 56 | - [x] Two face in the cut 57 | - [x] Custom caption and burn 58 | - [x] Make the code faster 59 | - [ ] More types of framing beyond 9:16 60 | - [x] The cut follows the face as it moves 61 | - [ ] Automatic translation 62 | - [ ] Satisfying video on the side 63 | - [ ] Background music 64 | - [ ] Watermark at user's choice 65 | - [ ] Upload directly to YouTube channel 66 | 67 | ## Examples 68 | ### Viral video example `with active highlight` [compressed to fit GitHub] 69 | https://github.com/user-attachments/assets/dd9a7039-e0f3-427a-a6e1-f50ab5029082 70 | 71 | ### Opus Clip vs ViralCutter example [compressed to fit GitHub] 72 | https://github.com/user-attachments/assets/12916792-dc0e-4f63-a76b-5698946f50f4 73 | 74 | ### 2-Face example [compressed to fit GitHub] 75 | https://github.com/user-attachments/assets/ca7ebb9c-52ba-4171-a513-625bef690a2b 76 | 77 | ## **Installation and Local Usage** 78 | 79 | ### Prerequisites 80 | - Python 3.10+ 81 | - FFmpeg installed and in the system PATH. 82 | - NVIDIA GPU recommended (with CUDA installed) for WhisperX. 83 | 84 | ### Configuration 85 | 1. **Install dependencies**: 86 | ```bash 87 | pip install -r requirements.txt 88 | ``` 89 | *(Note: WhisperX and Torch may require specific installation instructions for your CUDA version)*. 90 | 91 | 2. **Configure API (Optional but Recommended)**: 92 | Edit the `api_config.json` file in the root folder: 93 | ```json 94 | { 95 | "selected_api": "gemini", 96 | "gemini": { 97 | "api_key": "YOUR_KEY_HERE" 98 | } 99 | } 100 | ``` 101 | 102 | ### Running 103 | 104 | #### Interactive Mode (Simple) 105 | Just run the script and follow the on-screen instructions: 106 | ```bash 107 | python main_improved.py 108 | ``` 109 | 110 | #### CLI Mode (Advanced) 111 | You can pass all arguments via command line for automation: 112 | 113 | ```bash 114 | python main_improved.py --url "https://youtu.be/EXAMPLE" --segments 3 --ai-backend gemini --model large-v3-turbo 115 | ``` 116 | 117 | **Main Arguments:** 118 | - `--url`: YouTube video URL. 119 | - `--segments`: Number of cuts to generate. 120 | - `--ai-backend`: `gemini` (Recommended), `g4f`, or `manual`. 121 | - `--viral`: Activates automatic viral search mode. 122 | - `--face-mode`: `auto`, `1` (one face), or `2` (two faces/split). 123 | - `--workflow`: `1` (Full) or `2` (Cut Only, no captions/crop). 124 | 125 | --- 126 | 127 | ## **Contributions** 128 | Want to help make ViralCutter even better? If you have suggestions or want to contribute to the code, feel free to open an issue or submit a pull request on our GitHub repository. 129 | 130 | ## **Version** 131 | `0.7v Alpha` 132 | A free alternative to `opus.pro` and `vidyo.ai`. 133 | 134 | --- -------------------------------------------------------------------------------- /scripts/two_face.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import mediapipe as mp 3 | import numpy as np 4 | 5 | def crop_and_maintain_ar(frame, face_box, target_w, target_h, zoom_out_factor=2.2): 6 | """ 7 | Recorta uma região baseada no rosto mantendo o aspect ratio do target. 8 | Previne deformação (esticar/espremer). 9 | """ 10 | img_h, img_w, _ = frame.shape 11 | x, y, w, h = face_box 12 | 13 | # Centro do rosto 14 | cx = x + w // 2 15 | cy = y + h // 2 16 | 17 | # Dimensão base do rosto (maior lado para garantir cobertura) 18 | face_size = max(w, h) 19 | 20 | # Altura desejada do crop (altura do rosto * fator de zoom/afastamento) 21 | # zoom_out_factor: quanto maior, mais afastado (mais cenário) 22 | req_h = face_size * zoom_out_factor 23 | 24 | # Aspect Ratio alvo (1080 / 960 = 1.125) 25 | target_ar = target_w / target_h 26 | 27 | # Calcular largura e altura do crop mantendo AR 28 | crop_h = req_h 29 | crop_w = crop_h * target_ar 30 | 31 | # Verificar limitações da imagem original (não podemos cortar mais que existe) 32 | # Se a largura necessária for maior que a imagem, limitamos pela largura 33 | if crop_w > img_w: 34 | crop_w = float(img_w) 35 | crop_h = crop_w / target_ar 36 | 37 | # Se a altura necessária for maior que a imagem, limitamos pela altura 38 | if crop_h > img_h: 39 | crop_h = float(img_h) 40 | crop_w = crop_h * target_ar 41 | 42 | # Converter para inteiros 43 | crop_w = int(crop_w) 44 | crop_h = int(crop_h) 45 | 46 | # Calcular coordenadas top-left do crop centralizado no rosto 47 | x1 = int(cx - crop_w // 2) 48 | y1 = int(cy - crop_h // 2) 49 | 50 | # Ajuste de bordas (Clamp) deslisando a janela se possível 51 | # Se sair pela esquerda, encosta na esquerda 52 | if x1 < 0: 53 | x1 = 0 54 | # Se sair pela direita, encosta na direita 55 | elif x1 + crop_w > img_w: 56 | x1 = img_w - crop_w 57 | 58 | # Se sair por cima 59 | if y1 < 0: 60 | y1 = 0 61 | # Se sair por baixo 62 | elif y1 + crop_h > img_h: 63 | y1 = img_h - crop_h 64 | 65 | # Verificação de segurança final se a imagem for menor que o crop (embora lógica acima evite) 66 | x2 = x1 + crop_w 67 | y2 = y1 + crop_h 68 | 69 | # Crop 70 | cropped = frame[y1:y2, x1:x2] 71 | 72 | # Se o crop falhar (tamanho 0), retorna preto 73 | if cropped.size == 0 or cropped.shape[0] == 0 or cropped.shape[1] == 0: 74 | return np.zeros((target_h, target_w, 3), dtype=np.uint8) 75 | 76 | # Redimensionar para o tamanho alvo final (1080x960) 77 | # Como garantimos o AR, o resize mantém a proporção correta 78 | resized = cv2.resize(cropped, (target_w, target_h), interpolation=cv2.INTER_LINEAR) 79 | return resized 80 | 81 | def crop_and_resize_two_faces(frame, face_positions, zoom_out_factor=2.2): 82 | """ 83 | Recorta e redimensiona dois rostos detectados no frame, ajustando para uma composição vertical 84 | 1080x1920 onde cada rosto ocupa metade da tela (1080x960). 85 | """ 86 | # Target dimensoes para cada metade 87 | target_w = 1080 88 | target_h = 960 89 | 90 | # Se não temos 2 faces, fallback (segurança) 91 | if len(face_positions) < 2: 92 | return np.zeros((1920, 1080, 3), dtype=np.uint8) 93 | 94 | # Primeiro rosto (Topo) 95 | face1_img = crop_and_maintain_ar(frame, face_positions[0], target_w, target_h, zoom_out_factor) 96 | 97 | # Segundo rosto (Embaixo) 98 | face2_img = crop_and_maintain_ar(frame, face_positions[1], target_w, target_h, zoom_out_factor) 99 | 100 | # Compor imagem final (Stack Vertical) 101 | result_frame = np.vstack((face1_img, face2_img)) 102 | 103 | return result_frame 104 | 105 | 106 | def detect_face_or_body_two_faces(frame, face_detection, face_mesh, pose): 107 | # Converter a imagem para RGB 108 | frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) 109 | 110 | # Processar a detecção de rosto 111 | results_face_detection = face_detection.process(frame_rgb) 112 | results_face_mesh = face_mesh.process(frame_rgb) 113 | results_pose = pose.process(frame_rgb) 114 | 115 | face_positions_detection = [] 116 | if results_face_detection.detections: 117 | for detection in results_face_detection.detections[:2]: 118 | bbox = detection.location_data.relative_bounding_box 119 | x_min = int(bbox.xmin * frame.shape[1]) 120 | y_min = int(bbox.ymin * frame.shape[0]) 121 | width = int(bbox.width * frame.shape[1]) 122 | height = int(bbox.height * frame.shape[0]) 123 | face_positions_detection.append((x_min, y_min, width, height)) 124 | 125 | if len(face_positions_detection) == 2: 126 | return face_positions_detection 127 | 128 | face_positions_mesh = [] 129 | if results_face_mesh.multi_face_landmarks: 130 | for landmarks in results_face_mesh.multi_face_landmarks[:2]: 131 | x_coords = [int(landmark.x * frame.shape[1]) for landmark in landmarks.landmark] 132 | y_coords = [int(landmark.y * frame.shape[0]) for landmark in landmarks.landmark] 133 | x_min, x_max = min(x_coords), max(x_coords) 134 | y_min, y_max = min(y_coords), max(y_coords) 135 | width = x_max - x_min 136 | height = y_max - y_min 137 | face_positions_mesh.append((x_min, y_min, width, height)) 138 | 139 | if len(face_positions_mesh) == 2: 140 | return face_positions_mesh 141 | 142 | # If neither found 2, return what we found (prefer detection as it is bounding box optimized) 143 | if face_positions_detection: 144 | return face_positions_detection 145 | if face_positions_mesh: 146 | return face_positions_mesh 147 | 148 | # Se nenhum rosto for detectado, usar a pose para estimar o corpo 149 | if results_pose.pose_landmarks: 150 | x_coords = [lmk.x for lmk in results_pose.pose_landmarks.landmark] 151 | y_coords = [lmk.y for lmk in results_pose.pose_landmarks.landmark] 152 | x_min = int(min(x_coords) * frame.shape[1]) 153 | x_max = int(max(x_coords) * frame.shape[1]) 154 | y_min = int(min(y_coords) * frame.shape[0]) 155 | y_max = int(max(y_coords) * frame.shape[0]) 156 | width = x_max - x_min 157 | height = y_max - y_min 158 | return [(x_min, y_min, width, height)] 159 | 160 | return None 161 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ViralCutter: Gerador de Vídeos Virais 2 | [ ![](https://dcbadge.vercel.app/api/server/aihubbrasil) ](https://discord.gg/aihubbrasil)
3 | [English](https://github.com/RafaelGodoyEbert/ViralCutter/blob/main/README_en.md) | [Português](https://github.com/RafaelGodoyEbert/ViralCutter/blob/main/README.md) 4 | 5 | ## **Descrição** 6 | ViralCutter é uma ferramenta inovadora para gerar vídeos virais a partir de conteúdo existente. Com técnicas avançadas de processamento de vídeo e áudio, o ViralCutter corta e edita segmentos de vídeo que são perfeitos para compartilhamento em redes sociais. Utilizando o modelo WhisperX para transcrição e geração de legendas automáticas, ele adapta os vídeos para o formato 9:16 (vertical), ideal para plataformas como TikTok e Instagram com Reels e Youtube com Shorts. 7 | 8 | ## **Novidades e Atualizações (Changelog)** 9 | 10 | Confira as melhorias mais recentes: 11 | 12 | - **Otimização de Performance**: "Slicing" de transcrição implementado. O vídeo é transcrito apenas uma vez, e os cortes reutilizam os dados, eliminando reprocessamento. 13 | - **Suporte a IA Flexível**: Integração nativa com **Gemini API** e suporte experimental ao **G4F** (GPT-4 Free), além de modo Manual. 14 | - **Configuração Externa**: Arquivos `api_config.json` e `prompt.txt` para fácil personalização sem mexer no código. 15 | - **Correção de Rostos**: Fix no MediaPipe para rastreamento de rostos preciso sem depender de "Center Crop". 16 | - **Melhorias em Legendas**: Posicionamento inteligente para vídeos com 2 faces (split screen) e correções de estilo. 17 | 18 | *(Veja o [changelog.md](changelog.md) para detalhes completos)* 19 | 20 | ## **Funcionalidades** 21 | 22 | - **Download de Vídeos**: Baixa vídeos do YouTube através de uma URL fornecida. 23 | - **Transcrição de Áudio**: Converte áudio em texto utilizando o modelo WhisperX. 24 | - **Identificação de Segmentos Virais**: Utiliza IA para detectar partes do vídeo com alto potencial de viralização. 25 | - **Corte e Ajuste de Formato**: Corta os segmentos selecionados e ajusta a proporção para 9:16. 26 | - **Recorte Inteligente**: Mantém o falante em foco (Face Tracking) ou utiliza modo Split Screen (2-Faces) automático. 27 | - **Mesclagem de Áudio e Vídeo**: Combina o áudio transcrito com os clipes de vídeo processados. 28 | - **Exportação em Lote**: Gera um arquivo ZIP com todos os vídeos virais criados, facilitando o download e compartilhamento. 29 | - **Legenda personalizada**: Você cria uma legenda personalizada com cores, highlight, sem highlight ou palavra por palavra, tendo uma ampla possibilidade de edição. 30 | 31 | 32 | ## **Como Usar** 33 | 37 | 38 | - Entre no link e siga os passos na ordem: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1UZKzeqjIeEyvq9nPx7s_4mU6xlkZQn_R?usp=sharing#scrollTo=pa36OeArowme)
39 | 40 | ## **Limitações** 41 | 42 | - A qualidade dos vídeos gerados pode variar com base na qualidade do vídeo original. 43 | - O tempo de processamento depende fortemente da sua GPU. 44 | - O modelo **G4F** pode ser instável ou ter limites de requisição. Use **Gemini** para maior estabilidade mas precisa de api_key. 45 | 46 | ## Inspiração: 47 | Este projeto foi inspirado nos seguintes repositórios: 48 | 49 | * [Reels Clips Automator](https://github.com/eddieoz/reels-clips-automator) 50 | * [YoutubeVideoToAIPoweredShorts](https://github.com/Fitsbit/YoutubeVideoToAIPoweredShorts) 51 | 52 | ## TODO📝 53 | - [x] Release code 54 | - [ ] Huggingface SpaceDemo 55 | - [x] Two face in the cut 56 | - [x] Custom caption and burn 57 | - [x] Make the code faster 58 | - [ ] More types of framing beyond 9:16 59 | - [x] The cut follows the face as it moves 60 | - [ ] Automatic translation 61 | - [ ] Satisfactory video on the side 62 | - [ ] Background music 63 | - [ ] watermark at user's choice 64 | - [ ] Upload directly to YouTube channel 65 | 66 | ## Exemplos 67 | ### Exmplo de vídeo viral ``com highlight ativo`` [comprimido pra caber no github] 68 | https://github.com/user-attachments/assets/dd9a7039-e0f3-427a-a6e1-f50ab5029082 69 | 70 | ### Exemplo Opus Clip vs ViralCutter [comprimido pra caber no github] 71 | https://github.com/user-attachments/assets/12916792-dc0e-4f63-a76b-5698946f50f4 72 | 73 | ### Exemplo 2 faces [comprimido pra caber no github] 74 | https://github.com/user-attachments/assets/ca7ebb9c-52ba-4171-a513-625bef690a2b 75 | 76 | ## **Instalação e Uso Local** 77 | 78 | ### Pré-requisitos 79 | - Python 3.10+ 80 | - FFmpeg instalado e no PATH do sistema. 81 | - GPU NVIDIA recomendada (com CUDA instalado) para o WhisperX. 82 | 83 | ### Configuração 84 | 1. **Instale as dependências**: 85 | ```bash 86 | pip install -r requirements.txt 87 | ``` 88 | *(Nota: WhisperX e Torch podem exigir instalação específica para sua versão de CUDA)*. 89 | 90 | 2. **Configure a API (Opcional mas Recomendado)**: 91 | Edite o arquivo `api_config.json` na pasta raiz: 92 | ```json 93 | { 94 | "selected_api": "gemini", 95 | "gemini": { 96 | "api_key": "SUA_CHAVE_AQUI" 97 | } 98 | } 99 | ``` 100 | 101 | ### Executando 102 | 103 | #### Modo Interativo (Simples) 104 | Basta rodar o script e seguir as instruções na tela: 105 | ```bash 106 | python main_improved.py 107 | ``` 108 | 109 | #### Modo CLI (Avançado) 110 | Você pode passar todos os argumentos via linha de comando para automação: 111 | 112 | ```bash 113 | python main_improved.py --url "https://youtu.be/EXEMPLO" --segments 3 --ai-backend gemini --model large-v3-turbo 114 | ``` 115 | 116 | **Argumentos Principais:** 117 | - `--url`: URL do vídeo do YouTube. 118 | - `--segments`: Número de cortes a gerar. 119 | - `--ai-backend`: `gemini` (Recomendado), `g4f` ou `manual`. 120 | - `--viral`: Ativa modo de busca viral automática. 121 | - `--face-mode`: `auto`, `1` (uma cara), ou `2` (duas caras/split). 122 | - `--workflow`: `1` (Completo) ou `2` (Apenas Corte, sem legendas/crop). 123 | 124 | --- 125 | 126 | ## **Contribuições** 127 | Quer ajudar a tornar o ViralCutter ainda melhor? Se você tiver sugestões ou quiser contribuir com o código, fique à vontade para abrir uma issue ou enviar um pull request no nosso repositório do GitHub. 128 | 129 | ## **Versão** 130 | `0.7v Alpha` 131 | Uma alternativa gratuita ao `opus.pro` e ao `vidyo.ai`. 132 | 133 | --- -------------------------------------------------------------------------------- /scripts/transcribe_video.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import torch 4 | import time 5 | import whisperx 6 | import gc 7 | from i18n.i18n import I18nAuto 8 | 9 | i18n = I18nAuto() 10 | 11 | def apply_safe_globals_hack(): 12 | """ 13 | Workaround for 'Weights only load failed' error in newer PyTorch versions. 14 | We first try to add safe globals. If that's not enough/fails, we monkeypatch torch.load. 15 | """ 16 | try: 17 | import omegaconf 18 | if hasattr(torch.serialization, 'add_safe_globals'): 19 | torch.serialization.add_safe_globals([ 20 | omegaconf.listconfig.ListConfig, 21 | omegaconf.dictconfig.DictConfig, 22 | omegaconf.base.ContainerMetadata, 23 | omegaconf.base.Node 24 | ]) 25 | print("Aplicado patch de segurança para globals do Omegaconf.") 26 | 27 | # Monkeypatch agressivo para garantir compatibilidade com Pyannote/WhisperX antigos 28 | # Motivo: O pyannote carrega muitos checkpoints antigos que não são compatíveis com weights_only=True 29 | # Forçamos False incondicionalmente, ignorando o que for passado. 30 | original_load = torch.load 31 | 32 | def safe_load(*args, **kwargs): 33 | kwargs['weights_only'] = False 34 | return original_load(*args, **kwargs) 35 | 36 | torch.load = safe_load 37 | print("Aplicado monkeypatch em torch.load para forçar weights_only=False.") 38 | 39 | except ImportError: 40 | pass 41 | except Exception as e: 42 | print(f"Aviso ao tentar aplicar patch de globals: {e}") 43 | 44 | def transcribe(input_file, model_name='large-v3', project_folder='tmp'): 45 | print(i18n(f"Iniciando transcrição de {input_file}...")) 46 | 47 | # Diagnóstico de Ambiente 48 | print(f"DEBUG: Python: {sys.executable}") 49 | print(f"DEBUG: Torch: {torch.__version__}") 50 | 51 | start_time = time.time() 52 | 53 | # Se project_folder for None, tenta inferir do input_file ou usa tmp 54 | if project_folder is None: 55 | project_folder = os.path.dirname(input_file) 56 | if not project_folder: 57 | project_folder = 'tmp' 58 | 59 | output_folder = project_folder 60 | os.makedirs(output_folder, exist_ok=True) 61 | 62 | # O input_file pode ser absoluto, então basename está correto 63 | base_name = os.path.splitext(os.path.basename(input_file))[0] 64 | srt_file = os.path.join(output_folder, f"{base_name}.srt") 65 | tsv_file = os.path.join(output_folder, f"{base_name}.tsv") 66 | json_file = os.path.join(output_folder, f"{base_name}.json") 67 | 68 | # Verifica se os arquivos já existem 69 | if os.path.exists(srt_file) and os.path.exists(tsv_file) and os.path.exists(json_file): 70 | print(f"Os arquivos SRT, TSV e JSON já existem. Pulando a transcrição.") 71 | return srt_file, tsv_file 72 | 73 | # ... (Configuração e Transcrição) ... 74 | 75 | # Configuração de Dispositivo 76 | # Se CUDA estiver disponível no ambiente ATUAL, usamos. 77 | # Forçamos uma nova verificação limpa. 78 | device = "cuda" if torch.cuda.is_available() else "cpu" 79 | print(f"DEBUG: Usando dispositivo: {device}") 80 | 81 | # Parâmetros de computação 82 | # float16 é melhor pra GPU, mas se der erro podemos fallback pra int8 ou float32 83 | compute_type = "float16" if device == "cuda" else "float32" 84 | 85 | try: 86 | # Patch para erro de pickle/unpickle se necessário 87 | apply_safe_globals_hack() 88 | 89 | # 1. Carregar Modelo 90 | print(f"Carregando modelo {model_name}...") 91 | model = whisperx.load_model( 92 | model_name, 93 | device, 94 | compute_type=compute_type, 95 | asr_options={ 96 | "hotwords": None, 97 | } 98 | ) 99 | 100 | # 2. Carregar Áudio 101 | print(f"Carregando áudio: {input_file}") 102 | audio = whisperx.load_audio(input_file) 103 | 104 | # 3. Transcrever 105 | print("Realizando transcrição (WhisperX)...") 106 | result = model.transcribe( 107 | audio, 108 | batch_size=16, # Batch size ajustável 109 | chunk_size=10 110 | ) 111 | 112 | # 3.5 Alinhar (Critical for word-level timestamps) 113 | print("Alinhando transcrição para obter timestamps precisos...") 114 | try: 115 | detected_language = result["language"] 116 | model_a, metadata = whisperx.load_align_model(language_code=detected_language, device=device) 117 | result = whisperx.align(result["segments"], model_a, metadata, audio, device, return_char_alignments=False) 118 | 119 | # Restaurar a chave 'language' que o align remove (necessária para os writers) 120 | result["language"] = detected_language 121 | 122 | # Limpar modelo de alinhamento da memória 123 | if device == "cuda": 124 | del model_a 125 | torch.cuda.empty_cache() 126 | 127 | except Exception as e: 128 | print(f"Erro durante alinhamento: {e}. Continuando com transcrição bruta (pode afetar legendas dinâmicas).") 129 | 130 | # 4. Salvar Resultados 131 | print("Salvando resultados...") 132 | 133 | # WhisperX retorna um dicionário com 'segments'. 134 | # Precisamos converter para o formato que a ferramenta 'whisperx' CLI salva, 135 | # ou usar as funções de writer do próprio whisperx se disponíveis publicamente. 136 | # O whisperx.utils.get_writer é o caminho correto. 137 | 138 | from whisperx.utils import get_writer 139 | 140 | # Cria writers para SRT e TSV 141 | # O argumento 'output_dir' define onde salvar 142 | save_options = { 143 | "highlight_words": False, 144 | "max_line_count": None, 145 | "max_line_width": None 146 | } 147 | 148 | # Escreve SRT 149 | writer_srt = get_writer("srt", output_folder) 150 | writer_srt(result, input_file, save_options) 151 | 152 | # Escreve TSV 153 | writer_tsv = get_writer("tsv", output_folder) 154 | writer_tsv(result, input_file, save_options) 155 | 156 | # Escreve JSON (Novo) 157 | writer_json = get_writer("json", output_folder) 158 | writer_json(result, input_file, save_options) 159 | 160 | # Limpeza de memória VRAM 161 | if device == "cuda": 162 | del model 163 | gc.collect() 164 | torch.cuda.empty_cache() 165 | 166 | end_time = time.time() 167 | elapsed_time = end_time - start_time 168 | minutes = int(elapsed_time // 60) 169 | seconds = int(elapsed_time % 60) 170 | 171 | print(f"Transcrição concluída em {minutes}m {seconds}s.") 172 | 173 | except Exception as e: 174 | print(f"ERRO CRÍTICO na transcrição: {e}") 175 | import traceback 176 | traceback.print_exc() 177 | raise 178 | 179 | # Verificação Final 180 | if not os.path.exists(srt_file): 181 | print(f"AVISO: Arquivo SRT {srt_file} não encontrado após execução.") 182 | 183 | return srt_file, tsv_file 184 | -------------------------------------------------------------------------------- /scripts/cut_segments.py: -------------------------------------------------------------------------------- 1 | from scripts import cut_json 2 | import os 3 | import subprocess 4 | import json 5 | 6 | def cut(segments, project_folder="tmp", skip_video=False): 7 | 8 | def check_nvenc_support(): 9 | # ... (unchanged) 10 | try: 11 | result = subprocess.run(["ffmpeg", "-encoders"], capture_output=True, text=True) 12 | return "h264_nvenc" in result.stdout 13 | except subprocess.CalledProcessError: 14 | return False 15 | 16 | def generate_segments(response, project_folder, skip_video): 17 | if not check_nvenc_support(): 18 | print("NVENC is not supported on this system. Falling back to libx264.") 19 | video_codec = "libx264" 20 | else: 21 | video_codec = "h264_nvenc" 22 | 23 | # Procurar input_video.mp4 no project_folder ou tmp 24 | input_file = os.path.join(project_folder, "input.mp4") 25 | if not os.path.exists(input_file): 26 | # Tenta fallback legado 27 | input_file_legacy = os.path.join(project_folder, "input_video.mp4") 28 | if os.path.exists(input_file_legacy): 29 | input_file = input_file_legacy 30 | else: 31 | print(f"Input file not found in {project_folder}") 32 | return 33 | 34 | # Pasta de saida para os cortes 35 | cuts_folder = os.path.join(project_folder, "cuts") 36 | os.makedirs(cuts_folder, exist_ok=True) 37 | 38 | # Pasta de saida para legendas json cortadas 39 | subs_folder = os.path.join(project_folder, "subs") 40 | os.makedirs(subs_folder, exist_ok=True) 41 | 42 | # Input JSON (Transkription original) 43 | input_json_path = os.path.join(project_folder, "input.json") 44 | 45 | segments = response.get("segments", []) 46 | for i, segment in enumerate(segments): 47 | start_time = segment.get("start_time", "00:00:00") 48 | duration = segment.get("duration", 0) 49 | 50 | # Heurística para duration: 51 | if isinstance(duration, (int, float)): 52 | if duration < 1000: 53 | duration_seconds = float(duration) 54 | else: 55 | duration_seconds = duration / 1000.0 56 | duration_str = f"{duration_seconds:.3f}" 57 | else: 58 | # Tenta converter string (HH:MM:SS ou float str) 59 | try: 60 | duration_seconds = float(duration) 61 | duration_str = f"{duration_seconds:.3f}" 62 | except ValueError: 63 | # Assumindo formato hh:mm:ss se nao for float 64 | # Implementar parser se necessario, mas assumindo float por enquanto baseado no historico 65 | duration_seconds = 0 66 | duration_str = duration 67 | 68 | # Heurística para start_time: 69 | if isinstance(start_time, (int, float)): 70 | if start_time > 10000: # Se for milisegundos grandes? Assumindo segundos ou HHMMSS? 71 | # O código original: if start_time int -> start_time/1000.0. 72 | # Vamos manter a lógica original: int -> milisegundos 73 | pass 74 | 75 | # Refazendo a logica original exata para seguranca e capturando o float: 76 | if isinstance(start_time, int): 77 | start_time_seconds = start_time / 1000.0 78 | start_time_str = f"{start_time_seconds:.3f}" 79 | elif isinstance(start_time, float): 80 | start_time_seconds = start_time 81 | start_time_str = f"{start_time_seconds:.3f}" 82 | else: 83 | # String "00:00:00" ou "12.34" 84 | try: 85 | start_time_seconds = float(start_time) 86 | start_time_str = f"{start_time_seconds:.3f}" 87 | except: 88 | # Se for HH:MM:SS, ffmpeg aceita, mas precisamos converter para float para o json cutter 89 | # Função auxiliar simples 90 | h, m, s = str(start_time).split(':') 91 | start_time_seconds = int(h) * 3600 + int(m) * 60 + float(s) 92 | start_time_str = str(start_time) 93 | 94 | output_filename = f"output{str(i).zfill(3)}_original_scale.mp4" 95 | output_path = os.path.join(cuts_folder, output_filename) 96 | 97 | print(f"Processing segment {i+1}/{len(segments)}") 98 | print(f"Start time: {start_time}, Duration: {duration}") 99 | # print(f"Executing command: {' '.join(command)}") 100 | 101 | # VIDEO GENERATION 102 | if not skip_video: 103 | # Comando ffmpeg 104 | command = [ 105 | "ffmpeg", 106 | "-y", 107 | "-loglevel", "error", "-hide_banner", 108 | "-ss", start_time_str, 109 | "-i", input_file, 110 | "-t", duration_str, 111 | "-c:v", video_codec 112 | ] 113 | 114 | if video_codec == "h264_nvenc": 115 | command.extend([ 116 | "-preset", "p1", 117 | "-b:v", "5M", 118 | ]) 119 | else: 120 | command.extend([ 121 | "-preset", "ultrafast", 122 | "-crf", "23" 123 | ]) 124 | 125 | command.extend([ 126 | "-c:a", "aac", 127 | "-b:a", "128k", 128 | output_path 129 | ]) 130 | 131 | try: 132 | subprocess.run(command, check=True, capture_output=True, text=True) 133 | if os.path.exists(output_path): 134 | file_size = os.path.getsize(output_path) 135 | print(f"Generated segment: {output_filename}, Size: {file_size} bytes") 136 | except subprocess.CalledProcessError as e: 137 | print(f"Error executing ffmpeg: {e}") 138 | else: 139 | print(f"Skipping video generation for {output_filename} (using existing). check json...") 140 | 141 | # --- JSON CUTTING (ALWAYS RUN) --- 142 | end_time_seconds = start_time_seconds + float(duration_seconds) 143 | 144 | # Nome do json correspondente ao vídeo FINAL que será gerado (final-outputXXX) 145 | json_output_filename = f"final-output{str(i).zfill(3)}_processed.json" 146 | json_output_path = os.path.join(subs_folder, json_output_filename) 147 | 148 | cut_json.cut_json_transcript(input_json_path, json_output_path, start_time_seconds, end_time_seconds) 149 | # -------------------- 150 | 151 | print("\n" + "="*50 + "\n") 152 | 153 | # Reading the JSON file if segments not provided (legacy behavior) 154 | if segments is None: 155 | json_path = os.path.join(project_folder, 'viral_segments.txt') 156 | with open(json_path, 'r', encoding='utf-8') as file: 157 | response = json.load(file) 158 | else: 159 | response = segments 160 | 161 | generate_segments(response, project_folder, skip_video) 162 | -------------------------------------------------------------------------------- /scripts/adjust_subtitles.py: -------------------------------------------------------------------------------- 1 | import json 2 | import re 3 | import os 4 | 5 | def adjust(base_color, base_size, highlight_size, highlight_color, words_per_block, gap_limit, mode, vertical_position, alignment, font, outline_color, shadow_color, bold, italic, underline, strikeout, border_style, outline_thickness, shadow_size, project_folder="tmp"): 6 | def generate_ass(json_data, output_file, base_color=base_color, base_size=base_size, highlight_size=highlight_size, highlight_color=highlight_color, words_per_block=words_per_block, gap_limit=gap_limit, mode=mode, vertical_position=vertical_position, alignment=alignment, font=font, outline_color=outline_color, shadow_color=shadow_color, bold=bold, italic=italic, underline=underline, strikeout=strikeout, border_style=border_style, outline_thickness=outline_thickness, shadow_size=shadow_size, timeline_data=None): 7 | header_ass = f"""[Script Info] 8 | Title: Dynamic Subtitles 9 | ScriptType: v4.00+ 10 | PlayDepth: 0 11 | 12 | [V4+ Styles] 13 | Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding 14 | Style: Default,{font},{base_size},{base_color},&H00000000,{outline_color},{shadow_color},{bold},{italic},{underline},{strikeout},100,100,0,0,{border_style},{outline_thickness},{shadow_size},{alignment},-2,-2,{vertical_position},1 15 | 16 | [Events] 17 | Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text 18 | """ 19 | # Style: Default,{font},{base_size},{base_color},&H00000000,{outline_color},{shadow_color},{bold},{italic},{underline},{strikeout},100,100,0,0,1,1.5,0,{alignment},-2,-2,{vertical_position},1 20 | 21 | # 1. **Name**: `Default` - Style name. 22 | # 2. **Fontname**: `{font}` - Font name used. 23 | # 3. **Fontsize**: `{base_size}` - Font size. 24 | # 4. **PrimaryColour**: `{base_color}` - Primary text color. 25 | # 5. **SecondaryColour**: `&H00000000` - Secondary text color (used for karaoke). 26 | # 6. **OutlineColour**: `{outline_color}` - Text outline color. 27 | # 7. **BackColour**: `{shadow_color}` - Text background/shadow color. 28 | # 8. **Bold**: `{bold}` - Bold (1 to enable, 0 to disable). 29 | # 9. **Italic**: `{italic}` - Italic (1 to enable, 0 to disable). 30 | # 10. **Underline**: `{underline}` - Underline (1 to enable, 0 to disable). 31 | # 11. **StrikeOut**: `{strikeout}` - Strikeout (1 to enable, 0 to disable). 32 | 33 | # 12. **ScaleX**: `100` - Horizontal text scale (percentage). 34 | # 13. **ScaleY**: `100` - Vertical text scale (percentage). 35 | # 14. **Spacing**: `0` - Character spacing. 36 | # 15. **Angle**: `0` - Text rotation angle. 37 | 38 | # 16. **BorderStyle**: `{border_style}` - Border style (1 for outline, 3 for box). 39 | # 17. **Outline**: `{outline_thickness}` - Outline thickness. 40 | # 18. **Shadow**: `{shadow_size}` - Shadow size. 41 | # 19. **Alignment**: `{alignment}` - Text alignment (1=bottom left, 2=bottom center, 3=bottom right, etc.) 42 | 43 | # 20. **MarginL**: `-2` - Left margin. 44 | # 21. **MarginR**: `-2` - Right margin. 45 | # 22. **MarginV**: `60` - Vertical margin. 46 | # 23. **Encoding**: `1` - Font encoding. 47 | 48 | with open(output_file, "w", encoding="utf-8") as f: 49 | f.write(header_ass) 50 | 51 | last_end_time = 0.0 52 | 53 | for segment in json_data.get('segments', []): 54 | words = segment.get('words', []) 55 | total_words = len(words) 56 | 57 | i = 0 58 | while i < total_words: 59 | block = [] 60 | while len(block) < words_per_block and i < total_words: 61 | current_word = words[i] 62 | if 'word' in current_word: 63 | cleaned_word = re.sub(r'[.,!?;]', '', current_word['word']) 64 | block.append({**current_word, 'word': cleaned_word}) 65 | 66 | if i + 1 < total_words: 67 | next_word = words[i + 1] 68 | if 'start' not in next_word or 'end' not in next_word: 69 | next_cleaned_word = re.sub(r'[.,!?;]', '', next_word['word']) 70 | block[-1]['word'] += " " + next_cleaned_word 71 | i += 1 72 | i += 1 73 | 74 | start_times = [word.get('start', 0) for word in block] 75 | end_times = [word.get('end', 0) for word in block] 76 | 77 | for j in range(len(block)): 78 | start_sec = start_times[j] 79 | end_sec = end_times[j] 80 | 81 | # Prevent overlap and close gaps 82 | if start_sec - last_end_time < gap_limit: 83 | start_sec = last_end_time 84 | 85 | # Ensure valid duration 86 | if end_sec < start_sec: 87 | end_sec = start_sec 88 | 89 | start_time_ass = format_time_ass(start_sec) 90 | end_time_ass = format_time_ass(end_sec) 91 | 92 | last_end_time = end_sec 93 | 94 | line = "" 95 | if mode == "highlight": 96 | for k, word_data in enumerate(block): 97 | word = word_data['word'] 98 | if k == j: 99 | line += f"{{\\fs{highlight_size}\\c{highlight_color}}}{word} " 100 | else: 101 | line += f"{{\\fs{base_size}\\c{base_color}}}{word} " 102 | line = line.strip() 103 | 104 | elif mode == "sem_higlight": 105 | line = " ".join(word_data['word'] for word_data in block).strip() 106 | 107 | elif mode == "palavra_por_palavra": 108 | line = block[j]['word'].strip() 109 | 110 | # Check dynamic timeline for this specific time 111 | pos_tag = "" 112 | 113 | if timeline_data: 114 | # Verify if middle of subtitle is in a '2' mode segment 115 | mid_time = (start_sec + end_sec) / 2 116 | found_mode = "1" 117 | for seg in timeline_data: 118 | if seg['start'] <= mid_time <= seg['end']: 119 | found_mode = seg['mode'] 120 | break 121 | 122 | if found_mode == "2": 123 | # Force Center 124 | x_pos = 1080 // 2 125 | y_pos = 1920 // 2 126 | current_line_alignment = 5 # Center 127 | 128 | # Apply Override Tags: {\anX\pos(X,Y)} 129 | pos_tag = f"{{\\an{current_line_alignment}\\pos({y_pos})}}" 130 | final_line = f"{pos_tag}{line}" 131 | else: 132 | # Mode 1: Respect User Config (Standard Style) 133 | final_line = line 134 | else: 135 | final_line = line 136 | 137 | f.write(f"Dialogue: 0,{start_time_ass},{end_time_ass},Default,,0,0,0,,{final_line}\n") 138 | 139 | def format_time_ass(time_seconds): 140 | hours = int(time_seconds // 3600) 141 | minutes = int((time_seconds % 3600) // 60) 142 | seconds = int(time_seconds % 60) 143 | centiseconds = int((time_seconds % 1) * 100) 144 | return f"{hours:01}:{minutes:02}:{seconds:02}.{centiseconds:02}" 145 | 146 | # Input and Output Directories 147 | input_dir = os.path.join(project_folder, "subs") 148 | output_dir = os.path.join(project_folder, "subs_ass") 149 | 150 | # Create output directory if it doesn't exist 151 | os.makedirs(output_dir, exist_ok=True) 152 | 153 | # Load face modes if available 154 | face_modes = {} 155 | modes_file = os.path.join(project_folder, "face_modes.json") 156 | if os.path.exists(modes_file): 157 | try: 158 | with open(modes_file, "r") as f: 159 | face_modes = json.load(f) 160 | print("Loaded face modes for dynamic subtitle positioning.") 161 | except Exception as e: 162 | print(f"Could not load face modes: {e}") 163 | 164 | # Process all JSON files in input directory 165 | for filename in os.listdir(input_dir): 166 | if filename.endswith(".json"): 167 | input_path = os.path.join(input_dir, filename) 168 | output_filename = os.path.splitext(filename)[0] + ".ass" 169 | output_path = os.path.join(output_dir, output_filename) 170 | 171 | # Look for timeline file 172 | # filename is "final-outputXXX_processed.json" 173 | # timeline is "final-outputXXX_timeline.json" ? No, output_file was in 'final' folder 174 | # edit_video: timeline_file = output_file.replace(".mp4", "_timeline.json") 175 | # output_file was "final/temp_video_no_audio_{index}.mp4" -> "final/temp_video_no_audio_{index}_timeline.json" 176 | 177 | # We need to map filename to index to find timeline 178 | # Current filename: "final-output000_processed.json" 179 | match = re.search(r"output(\d+)", filename) 180 | timeline_data = None 181 | if match: 182 | idx = int(match.group(1)) 183 | # Construct path to timeline 184 | # edit_video saved it in 'final_folder' which is inside project_folder/final 185 | # Pattern: temp_video_no_audio_{index}_timeline.json 186 | timeline_path = os.path.join(project_folder, "final", f"temp_video_no_audio_{idx}_timeline.json") 187 | if os.path.exists(timeline_path): 188 | try: 189 | with open(timeline_path, "r") as tf: 190 | timeline_data = json.load(tf) 191 | print(f" -> Found dynamic timeline for video {idx}") 192 | except: 193 | pass 194 | 195 | # Load JSON file 196 | with open(input_path, "r", encoding="utf-8") as file: 197 | json_data = json.load(file) 198 | 199 | # Determine static alignment (fallback) 200 | base_name = os.path.splitext(filename)[0] 201 | key_match = re.search(r"(output\d+)", base_name) 202 | key = key_match.group(1) if key_match else base_name 203 | 204 | current_alignment = alignment 205 | current_vertical_position = vertical_position 206 | 207 | mode_face = face_modes.get(key) 208 | if mode_face == "2" and not timeline_data: # Only use static if no timeline 209 | current_alignment = 5 210 | current_vertical_position = 0 211 | # print(f" -> Video {base_name}: 2 Faces detected (static). Using Center Subtitles.") 212 | 213 | # Generate ASS file with dynamic timeline support 214 | generate_ass(json_data, output_path, mode=mode, words_per_block=words_per_block, 215 | vertical_position=current_vertical_position, alignment=current_alignment, 216 | timeline_data=timeline_data) 217 | 218 | print(f"Processed file: {filename} -> {output_filename}") 219 | 220 | print("All JSON files processed and converted to ASS.") -------------------------------------------------------------------------------- /scripts/create_viral_segments.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import re 4 | 5 | # Tenta importar bibliotecas de IA opcionalmente 6 | try: 7 | import google.generativeai as genai 8 | HAS_GEMINI = True 9 | except ImportError: 10 | HAS_GEMINI = False 11 | 12 | try: 13 | import g4f 14 | HAS_G4F = True 15 | except ImportError: 16 | HAS_G4F = False 17 | 18 | def clean_json_response(response_text): 19 | """Limpa blocos de código markdown do texto de resposta.""" 20 | if not response_text: 21 | return {"segments": []} 22 | # Remove ```json ... ``` 23 | pattern = r"```json(.*?)```" 24 | match = re.search(pattern, response_text, re.DOTALL) 25 | if match: 26 | response_text = match.group(1) 27 | elif "```" in response_text: 28 | response_text = response_text.replace("```", "") 29 | 30 | return json.loads(response_text.strip()) 31 | 32 | def call_gemini(prompt, api_key, model_name='gemini-2.5-flash-lite-preview-09-2025'): 33 | if not HAS_GEMINI: 34 | raise ImportError("A biblioteca 'google-generativeai' não está instalada. Instale com: pip install google-generativeai") 35 | 36 | genai.configure(api_key=api_key) 37 | # Usando modelo definido na config ou o padrão 38 | model = genai.GenerativeModel(model_name) 39 | 40 | try: 41 | response = model.generate_content(prompt) 42 | return response.text 43 | except Exception as e: 44 | print(f"Erro na API do Gemini: {e}") 45 | return "{}" 46 | 47 | def call_g4f(prompt, model_name="gpt-4o-mini"): 48 | if not HAS_G4F: 49 | raise ImportError("A biblioteca 'g4f' não está instalada. Instale com: pip install g4f") 50 | 51 | try: 52 | # Tenta usar um provider automático 53 | response = g4f.ChatCompletion.create( 54 | model=model_name, 55 | messages=[{"role": "user", "content": prompt}], 56 | ) 57 | return response 58 | except Exception as e: 59 | print(f"Erro na API do G4F: {e}") 60 | return "{}" 61 | 62 | def create(num_segments, viral_mode, themes, tempo_minimo, tempo_maximo, ai_mode="manual", api_key=None, project_folder="tmp"): 63 | quantidade_de_virals = num_segments 64 | 65 | # Ler transcrição 66 | input_tsv = os.path.join(project_folder, 'input.tsv') 67 | input_srt = os.path.join(project_folder, 'input.srt') 68 | 69 | # Fallback pro SRT se TSV não existir 70 | if not os.path.exists(input_tsv): 71 | print(f"Aviso: {input_tsv} não encontrado. Tentando ler do SRT raw.") 72 | if os.path.exists(input_srt): 73 | with open(input_srt, 'r', encoding='utf-8') as f: 74 | content = f.read() 75 | else: 76 | raise FileNotFoundError(f"Nenhum arquivo de transcrição encontrado em {project_folder}") 77 | else: 78 | with open(input_tsv, 'r', encoding='utf-8') as f: 79 | content = f.read() 80 | 81 | # Load Config and Prompt 82 | base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) 83 | config_path = os.path.join(base_dir, 'api_config.json') 84 | prompt_path = os.path.join(base_dir, 'prompt.txt') 85 | 86 | # Default Config 87 | config = { 88 | "selected_api": "gemini", 89 | "gemini": { 90 | "api_key": "", 91 | "model": "gemini-2.5-flash-lite-preview-09-2025", 92 | "chunk_size": 15000 93 | }, 94 | "g4f": { 95 | "model": "gpt-4o-mini", 96 | "chunk_size": 2000 97 | } 98 | } 99 | 100 | if os.path.exists(config_path): 101 | try: 102 | with open(config_path, 'r', encoding='utf-8') as f: 103 | loaded_config = json.load(f) 104 | # Merge simples (profundidade 1 e 2 apenas para chaves conhecidas) 105 | if "gemini" in loaded_config: config["gemini"].update(loaded_config["gemini"]) 106 | if "g4f" in loaded_config: config["g4f"].update(loaded_config["g4f"]) 107 | if "selected_api" in loaded_config: config["selected_api"] = loaded_config["selected_api"] 108 | except Exception as e: 109 | print(f"Erro ao ler api_config.json: {e}. Usando padrões.") 110 | 111 | # Override ai_mode if specified in config and not manual 112 | # Mas mantemos o ai_mode passado como argumento se ele não for "manual" (assumindo que "manual" é o default se ninguem passou nada, ou se a UI passou) 113 | # Se ai_mode for manual e o user configurou outra coisa no json, podemos usar? 114 | # Melhor respeitar o argumento da função: ai_mode 115 | 116 | # Configurar variaveis baseadas no ai_mode 117 | current_chunk_size = 15000 # default fallback 118 | model_name = "" 119 | 120 | if ai_mode == "gemini": 121 | current_chunk_size = config["gemini"].get("chunk_size", 15000) 122 | model_name = config["gemini"].get("model", "gemini-2.5-flash-lite-preview-09-2025") 123 | if not api_key: # Se não veio por argumento, tenta do config 124 | api_key = config["gemini"].get("api_key", "") 125 | 126 | elif ai_mode == "g4f": 127 | current_chunk_size = config["g4f"].get("chunk_size", 2000) 128 | model_name = config["g4f"].get("model", "gpt-4o-mini") 129 | 130 | system_prompt_template = "" 131 | if os.path.exists(prompt_path): 132 | with open(prompt_path, 'r', encoding='utf-8') as f: 133 | system_prompt_template = f.read() 134 | else: 135 | # Fallback se arquivo nao existir 136 | print("Aviso: prompt.txt não encontrado. Usando prompt interno.") 137 | system_prompt_template = """You are a Viral Segment Identifier. 138 | {context_instruction} 139 | Given the following video transcript chunk, {virality_instruction}. 140 | CONSTRAINTS: 141 | - Each segment duration: {min_duration}s to {max_duration}s. 142 | - Cuts MUST MAKE SENSE contextually. 143 | - RETURN ONLY VALID JSON. 144 | 145 | TRANSCRIPT CHUNK: 146 | {transcript_chunk} 147 | 148 | OUTPUT FORMAT: 149 | {json_template}""" 150 | 151 | 152 | json_template = ''' 153 | { "segments" : 154 | [ 155 | { 156 | "title": "Suggested Viral Title", 157 | "start_time": number, 158 | "end_time": number, 159 | "description": "Description of the text", 160 | "duration": 0, 161 | "score": 0 # Probability of going viral (0-100) 162 | } 163 | ] 164 | } 165 | ''' 166 | 167 | # Split content into chunks 168 | chunk_size = int(current_chunk_size) 169 | chunks = [] 170 | start = 0 171 | 172 | while start < len(content): 173 | end = min(start + chunk_size, len(content)) 174 | if end < len(content): 175 | end = content.rfind('\n', start, end) 176 | if end == -1: 177 | end = start + chunk_size 178 | chunks.append(content[start:end]) 179 | start = end 180 | 181 | if viral_mode: 182 | virality_instruction = f"""analyze the segment for potential virality and identify {quantidade_de_virals} most viral segments from the transcript""" 183 | else: 184 | virality_instruction = f"""analyze the segment for potential virality and identify {quantidade_de_virals} the best parts based on the list of themes {themes}.""" 185 | 186 | output_texts = [] 187 | for i, chunk in enumerate(chunks): 188 | context_instruction = "" 189 | if len(chunks) > 1: 190 | context_instruction = f"Part {i+1} of {len(chunks)}. " 191 | 192 | # Preencher o template 193 | try: 194 | prompt = system_prompt_template.format( 195 | context_instruction=context_instruction, 196 | virality_instruction=virality_instruction, 197 | min_duration=tempo_minimo, 198 | max_duration=tempo_maximo, 199 | transcript_chunk=chunk, 200 | json_template=json_template, 201 | amount=quantidade_de_virals # Caso o user use {amount} no txt 202 | ) 203 | except KeyError as e: 204 | # Fallback se o user bagunçou o txt e esqueceu chaves ou colocou chaves erradas 205 | # Tenta um replace manual basico ou avisa erro, mas ideal é não quebrar. 206 | # Vamos usar replace seguro 207 | prompt = system_prompt_template 208 | prompt = prompt.replace("{context_instruction}", context_instruction) 209 | prompt = prompt.replace("{virality_instruction}", virality_instruction) 210 | prompt = prompt.replace("{min_duration}", str(tempo_minimo)) 211 | prompt = prompt.replace("{max_duration}", str(tempo_maximo)) 212 | prompt = prompt.replace("{transcript_chunk}", chunk) 213 | prompt = prompt.replace("{json_template}", json_template) 214 | prompt = prompt.replace("{amount}", str(quantidade_de_virals)) 215 | 216 | output_texts.append(prompt) 217 | 218 | all_segments = [] 219 | 220 | print(f"Processando {len(output_texts)} chunks usando modo: {ai_mode.upper()}") 221 | 222 | for i, prompt in enumerate(output_texts): 223 | response_text = "" 224 | 225 | # Always save prompt to file (Manual, Gemini, or G4F) 226 | manual_prompt_path = os.path.join(project_folder, "prompt.txt") 227 | try: 228 | with open(manual_prompt_path, "w", encoding="utf-8") as f: 229 | f.write(prompt) 230 | except Exception as e: 231 | print(f"[ERRO] Falha ao salvar prompt.txt: {e}") 232 | 233 | if ai_mode == "manual": 234 | print(f"\n[INFO] O prompt foi salvo em: {manual_prompt_path}") 235 | 236 | print("\n" + "="*60) 237 | print(f"CHUNK {i+1}/{len(output_texts)}") 238 | print("="*60) 239 | print("COPIE O PROMPT ABAIXO (OU DO ARQUIVO GERADO) E COLE NA SUA IA PREFERIDA:") 240 | print("-" * 20) 241 | print(prompt) 242 | print("-" * 20) 243 | print("="*60) 244 | print("Cole o JSON de resposta abaixo e pressione ENTER.") 245 | print("Dica: Se o JSON tiver múltiplas linhas, tente colar tudo de uma vez ou minificado.") 246 | print("Se preferir, digite 'file' para ler de um arquivo 'tmp/response.json'.") 247 | 248 | user_input = input("JSON ou 'file': ") 249 | 250 | if user_input.lower() == 'file': 251 | try: 252 | response_json_path = os.path.join(project_folder, 'response.json') 253 | with open(response_json_path, 'r', encoding='utf-8') as rf: 254 | response_text = rf.read() 255 | except FileNotFoundError: 256 | print(f"Arquivo {response_json_path} não encontrado.") 257 | else: 258 | response_text = user_input 259 | # Tenta ler mais linhas se parecer incompleto (bruteforce simples) 260 | if response_text.strip().startswith("{") and not response_text.strip().endswith("}"): 261 | print("Parece incompleto. Cole o resto e dê Enter (ou Ctrl+C para cancelar):") 262 | try: 263 | rest = sys.stdin.read() # Isso pode travar no Windows sem EOF explícito 264 | response_text += rest 265 | except: 266 | pass 267 | 268 | elif ai_mode == "gemini": 269 | print(f"Enviando chunk {i+1} para o Gemini (Model: {model_name})...") 270 | response_text = call_gemini(prompt, api_key, model_name=model_name) 271 | 272 | elif ai_mode == "g4f": 273 | print(f"Enviando chunk {i+1} para o G4F (Model: {model_name})...") 274 | response_text = call_g4f(prompt, model_name=model_name) 275 | 276 | # Processar resposta 277 | try: 278 | data = clean_json_response(response_text) 279 | chunk_segments = data.get("segments", []) 280 | print(f"Encontrados {len(chunk_segments)} segmentos neste chunk.") 281 | all_segments.extend(chunk_segments) 282 | except json.JSONDecodeError: 283 | print(f"Erro: Resposta inválida (não é JSON válida).") 284 | print(f"Conteúdo recebido (primeiros 100 chars): {response_text[:100]}...") 285 | except Exception as e: 286 | print(f"Erro desconhecido ao processar chunk: {e}") 287 | 288 | # Retorna o dicionário consolidado 289 | final_result = {"segments": all_segments} 290 | 291 | # Validação básica de duração nos resultados (opcional, mas bom pra evitar erros no ffmpeg) 292 | # Convertendo milliseconds pra int se necessário, garantindo sanidade 293 | validated_segments = [] 294 | for seg in final_result['segments']: 295 | # Garante start_time 296 | if 'start_time' in seg: 297 | # Deixa passar, cut_segments lida com int/str conversion 298 | validated_segments.append(seg) 299 | 300 | final_result['segments'] = validated_segments 301 | 302 | return final_result -------------------------------------------------------------------------------- /main_improved.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | # Suppress unnecessary logs before importing heavy libs 5 | os.environ["ORT_LOGGING_LEVEL"] = "3" 6 | os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" 7 | 8 | import warnings 9 | warnings.filterwarnings("ignore") 10 | 11 | import json 12 | import shutil 13 | import subprocess 14 | import argparse 15 | import time 16 | from scripts import ( 17 | download_video, 18 | transcribe_video, 19 | create_viral_segments, 20 | cut_segments, 21 | edit_video, 22 | transcribe_cuts, 23 | adjust_subtitles, 24 | burn_subtitles, 25 | save_json, 26 | organize_output, 27 | ) 28 | from i18n.i18n import I18nAuto 29 | 30 | # Inicializa sistema de tradução 31 | i18n = I18nAuto() 32 | # 33 | # Configurações de Legenda (ASS Style) 34 | # Cores no formato BGR (Blue-Green-Red) para o ASS 35 | COLORS = { 36 | "red": "0000FF", # Red 37 | "yellow": "00FFFF", # Yellow 38 | "green": "00FF00", # Green 39 | "white": "FFFFFF", # White 40 | "black": "000000", # Black 41 | "grey": "808080", # Grey 42 | } 43 | 44 | def get_subtitle_config(config_path=None): 45 | """ 46 | Returns the subtitle configuration dictionary. 47 | Can be expanded to load from a JSON/YAML file in the future. 48 | """ 49 | # Default Config 50 | base_color_transparency = "00" 51 | outline_transparency = "FF" 52 | highlight_color_transparency = "00" 53 | shadow_color_transparency = "00" 54 | 55 | config = { 56 | "font": "Montserrat-Regular", 57 | "base_size": 12, 58 | "base_color": f"&H{base_color_transparency}{COLORS['white']}&", 59 | "highlight_size": 14, 60 | "words_per_block": 3, 61 | "gap_limit": 0.5, 62 | "mode": 'highlight', # Options: 'no_highlight', 'word_by_word', 'highlight' 63 | "highlight_color": f"&H{highlight_color_transparency}{COLORS['green']}&", 64 | "vertical_position": 60, # 1=170(top), ... 4=60(default) 65 | "alignment": 2, # 2=Center 66 | "bold": 0, 67 | "italic": 0, 68 | "underline": 0, 69 | "strikeout": 0, 70 | "border_style": 2, # 1=outline, 3=box 71 | "outline_thickness": 1.5, 72 | "outline_color": f"&H{outline_transparency}{COLORS['grey']}&", 73 | "shadow_size": 2, 74 | "shadow_color": f"&H{shadow_color_transparency}{COLORS['black']}&", 75 | } 76 | 77 | if config_path and os.path.exists(config_path): 78 | try: 79 | with open(config_path, 'r', encoding='utf-8') as f: 80 | loaded_config = json.load(f) 81 | config.update(loaded_config) 82 | print(f"Loaded subtitle config from {config_path}") 83 | except Exception as e: 84 | print(f"Error loading subtitle config: {e}. Using defaults.") 85 | 86 | return config 87 | 88 | def interactive_input_int(prompt_text): 89 | """Solicita um inteiro ao usuário via terminal.""" 90 | while True: 91 | try: 92 | value = int(input(i18n(prompt_text))) 93 | if value > 0: 94 | return value 95 | print(i18n("\nError: Number must be greater than 0.")) 96 | except ValueError: 97 | print(i18n("\nError: The value you entered is not an integer. Please try again.")) 98 | 99 | def main(): 100 | # Configuração de Argumentos via Linha de Comando (CLI) 101 | parser = argparse.ArgumentParser(description="ViralCutter CLI") 102 | parser.add_argument("--url", help="YouTube Video URL") 103 | parser.add_argument("--segments", type=int, help="Number of segments to create") 104 | parser.add_argument("--viral", action="store_true", help="Enable viral mode") 105 | parser.add_argument("--themes", help="Comma-separated themes (if not viral mode)") 106 | parser.add_argument("--burn-only", action="store_true", help="Skip processing and only burn subtitles") 107 | parser.add_argument("--min-duration", type=int, default=15, help="Minimum segment duration (seconds)") 108 | parser.add_argument("--max-duration", type=int, default=90, help="Maximum segment duration (seconds)") 109 | parser.add_argument("--model", default="large-v3-turbo", help="Whisper model to use") 110 | 111 | parser.add_argument("--ai-backend", choices=["manual", "gemini", "g4f"], help="AI backend for viral analysis") 112 | parser.add_argument("--api-key", help="Gemini API Key (required if ai-backend is gemini)") 113 | 114 | parser.add_argument("--workflow", choices=["1", "2"], default="1", help="Workflow choice: 1=Full, 2=Cut Only") 115 | parser.add_argument("--face-model", choices=["insightface", "mediapipe"], default="insightface", help="Face detection model") 116 | parser.add_argument("--face-mode", choices=["auto", "1", "2"], default="auto", help="Face tracking mode: auto, 1, 2") 117 | parser.add_argument("--subtitle-config", help="Path to subtitle configuration JSON file") 118 | parser.add_argument("--face-detect-interval", type=str, default="0.17,1.0", help="Face detection interval in seconds. Single value or 'interval_1face,interval_2face'") 119 | parser.add_argument("--skip-prompts", action="store_true", help="Skip interactive prompts and use defaults/existing files") 120 | 121 | args = parser.parse_args() 122 | 123 | # Modo Apenas Queimar Legenda 124 | # Verifica o argumento CLI ou uma variável local hardcoded (para compatibilidade) 125 | burn_only_mode = args.burn_only 126 | 127 | if burn_only_mode: 128 | print(i18n("Burn only mode activated. Skipping to subtitle burning...")) 129 | burn_subtitles.burn() 130 | print(i18n("Subtitle burning completed.")) 131 | return 132 | 133 | # Obtenção de Inputs (CLI ou Interativo) 134 | url = args.url 135 | input_video = None 136 | 137 | # Se não temos URL via CLI, pedimos agora 138 | if not url: 139 | if args.skip_prompts: 140 | print(i18n("No URL provided and skipping prompts. Trying to load latest project...")) 141 | # Fallthrough to project loading logic 142 | else: 143 | user_input = input(i18n("Enter the YouTube video URL (or press Enter to use latest project): ")).strip() 144 | if user_input: 145 | url = user_input 146 | 147 | if not url: 148 | # Usuário apertou Enter (Vazio) -> Tentar pegar último projeto 149 | base_virals = "VIRALS" 150 | if os.path.exists(base_virals): 151 | subdirs = [os.path.join(base_virals, d) for d in os.listdir(base_virals) if os.path.isdir(os.path.join(base_virals, d))] 152 | if subdirs: 153 | latest_project = max(subdirs, key=os.path.getmtime) 154 | detected_video = os.path.join(latest_project, "input.mp4") 155 | if os.path.exists(detected_video): 156 | input_video = detected_video 157 | print(i18n("Using latest project: {}").format(latest_project)) 158 | else: 159 | print(i18n("Latest project found but 'input.mp4' is missing.")) 160 | sys.exit(1) 161 | else: 162 | print(i18n("No existing projects found in VIRALS folder.")) 163 | sys.exit(1) 164 | else: 165 | print(i18n("VIRALS folder not found. Cannot load latest project.")) 166 | sys.exit(1) 167 | 168 | # ------------------------------------------------------------------------- 169 | # Checagem Antecipada de Segmentos Virais (Para pular configurações se já existirem) 170 | # ------------------------------------------------------------------------- 171 | viral_segments = None 172 | project_folder_anticipated = None 173 | 174 | if input_video: 175 | # Se já temos o vídeo, podemos deduzir a pasta 176 | project_folder_anticipated = os.path.dirname(input_video) 177 | viral_segments_file = os.path.join(project_folder_anticipated, "viral_segments.txt") 178 | 179 | if os.path.exists(viral_segments_file): 180 | print(i18n("\nExisting viral segments found: {}").format(viral_segments_file)) 181 | if args.skip_prompts: 182 | use_existing_json = 'yes' 183 | else: 184 | use_existing_json = input(i18n("Use existing viral segments? (yes/no) [default: yes]: ")).strip().lower() 185 | 186 | if use_existing_json in ['', 'y', 'yes']: 187 | try: 188 | with open(viral_segments_file, 'r', encoding='utf-8') as f: 189 | viral_segments = json.load(f) 190 | print(i18n("Loaded existing viral segments. Skipping configuration prompts.")) 191 | except Exception as e: 192 | print(i18n("Error loading JSON: {}.").format(e)) 193 | 194 | # Variaveis de config de IA (só necessárias se não tivermos os segmentos) 195 | num_segments = None 196 | viral_mode = False 197 | themes = "" 198 | ai_backend = "manual" # default 199 | api_key = None 200 | 201 | if not viral_segments: 202 | num_segments = args.segments 203 | if not num_segments: 204 | if args.skip_prompts: 205 | print("No segments count provided and skip-prompts is ON. Using default 3.") 206 | num_segments = 3 207 | else: 208 | num_segments = interactive_input_int("Enter the number of viral segments to create: ") 209 | 210 | viral_mode = args.viral 211 | if not args.viral and not args.themes: 212 | if args.skip_prompts: 213 | print("Viral mode not set, defaulting to True.") 214 | viral_mode = True 215 | else: 216 | response = input(i18n("Do you want viral mode? (yes/no): ")).lower() 217 | viral_mode = response in ['yes', 'y'] 218 | 219 | themes = args.themes if args.themes else "" 220 | if not viral_mode and not themes: 221 | if not args.skip_prompts: 222 | themes = input(i18n("Enter themes (comma-separated, leave blank if viral mode is True): ")) 223 | 224 | # Duration Config 225 | print(i18n("\nCurrent duration settings: {}s - {}s").format(args.min_duration, args.max_duration)) 226 | if not args.skip_prompts: 227 | change_dur = input(i18n("Change duration? (y/n) [default: n]: ")).strip().lower() 228 | if change_dur in ['y', 'yes']: 229 | try: 230 | min_d = input(i18n("Minimum duration [{}]: ").format(args.min_duration)).strip() 231 | if min_d: args.min_duration = int(min_d) 232 | 233 | max_d = input(i18n("Maximum duration [{}]: ").format(args.max_duration)).strip() 234 | if max_d: args.max_duration = int(max_d) 235 | except ValueError: 236 | print(i18n("Invalid number. Using previous values.")) 237 | 238 | # Load API Config 239 | config_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'api_config.json') 240 | api_config = {} 241 | if os.path.exists(config_path): 242 | try: 243 | with open(config_path, 'r', encoding='utf-8') as f: 244 | api_config = json.load(f) 245 | except: 246 | pass 247 | 248 | # Seleção do Backend de IA 249 | ai_backend = args.ai_backend 250 | 251 | # Try to load backend from config if not in args 252 | if not ai_backend and api_config.get("selected_api"): 253 | ai_backend = api_config.get("selected_api") 254 | print(i18n("Using AI Backend from config: {}").format(ai_backend)) 255 | 256 | if not ai_backend: 257 | if args.skip_prompts: 258 | print("No AI backend selected, defaulting to Manual.") 259 | ai_backend = "manual" 260 | else: 261 | print("\n" + i18n("Select AI Backend for Viral Analysis:")) 262 | print(i18n("1. Gemini API (Best / Recommended)")) 263 | print(i18n("2. G4F (Free / Experimental)")) 264 | print(i18n("3. Manual (Copy/Paste Prompt)")) 265 | choice = input(i18n("Choose (1/2/3): ")).strip() 266 | 267 | if choice == "1": 268 | ai_backend = "gemini" 269 | elif choice == "2": 270 | ai_backend = "g4f" 271 | else: 272 | ai_backend = "manual" 273 | 274 | api_key = args.api_key 275 | # Check config for API Key if using Gemini 276 | if ai_backend == "gemini" and not api_key: 277 | cfg_key = api_config.get("gemini", {}).get("api_key", "") 278 | if cfg_key and cfg_key != "SUA_KEY_AQUI": 279 | api_key = cfg_key 280 | 281 | if ai_backend == "gemini" and not api_key: 282 | if args.skip_prompts: 283 | print("Gemini API key missing, but skip-prompts is ON. Might fail.") 284 | else: 285 | print(i18n("Gemini API Key not found in api_config.json or arguments.")) 286 | api_key = input(i18n("Enter your Gemini API Key: ")).strip() 287 | 288 | # Workflow & Face Config Inputs 289 | workflow_choice = args.workflow 290 | face_model = args.face_model 291 | face_mode = args.face_mode 292 | 293 | # If args weren't provided and we are not skipping prompts, ask user 294 | # Note: argparse defaults are set, so they "are provided" effectively. 295 | # To truly detect "not provided", request default=None in argparse. 296 | # But for "Simplified Mode", defaults are good. 297 | # Advanced users use params. 298 | # We will assume CLI defaults are what we want if skip_prompts is on. 299 | 300 | if not args.burn_only and not args.skip_prompts: 301 | # Interactive Face Config 302 | print(i18n("\n--- Face Detection Settings ---")) 303 | print(i18n("Current Face Model: {} | Mode: {}").format(face_model, face_mode)) 304 | 305 | detection_intervals = None 306 | if args.face_detect_interval: 307 | try: 308 | parts = args.face_detect_interval.split(',') 309 | if len(parts) == 1: 310 | val = float(parts[0]) 311 | detection_intervals = {'1': val, '2': val} 312 | print(i18n("Custom detection interval set: {}s for both modes").format(val)) 313 | elif len(parts) >= 2: 314 | val1 = float(parts[0]) 315 | val2 = float(parts[1]) 316 | detection_intervals = {'1': val1, '2': val2} 317 | print(i18n("Custom detection interval set: {}s (1-face), {}s (2-face)").format(val1, val2)) 318 | except ValueError: 319 | print(i18n("Invalid format for face-detect-interval. Using defaults.")) 320 | else: 321 | print(i18n("Using dynamic intervals: 1s for 2-face, ~0.16s for 1-face.")) 322 | 323 | 324 | # Pipeline Execution 325 | try: 326 | # 1. Download & Project Setup 327 | print(f"DEBUG: Checking input_video state. input_video={input_video}") 328 | 329 | if not input_video: 330 | if not url: 331 | print(i18n("Error: No URL provided and no existing video selected.")) 332 | sys.exit(1) 333 | 334 | print(i18n("Starting download...")) 335 | download_result = download_video.download(url) 336 | 337 | if isinstance(download_result, tuple): 338 | input_video, project_folder = download_result 339 | else: 340 | input_video = download_result 341 | project_folder = os.path.dirname(input_video) 342 | 343 | print(f"DEBUG: Download finished. input_video={input_video}, project_folder={project_folder}") 344 | 345 | else: 346 | # Reuso de video existente 347 | print("DEBUG: Using existing video logic.") 348 | project_folder = os.path.dirname(input_video) 349 | 350 | print(f"Project Folder: {project_folder}") 351 | 352 | # 2. Transcribe 353 | print(i18n("Transcribing with model {}...").format(args.model)) 354 | # Se skip config, args.model é default 355 | srt_file, tsv_file = transcribe_video.transcribe(input_video, args.model, project_folder=project_folder) 356 | 357 | # 3. Create Viral Segments 358 | # Se não carregamos 'viral_segments' lá em cima (ou se era download novo), checamos agora ou criamos 359 | if not viral_segments: 360 | # Checagem tardia para downloads novos que por acaso ja tenham json (Ex: URL repetida) 361 | viral_segments_file_late = os.path.join(project_folder, "viral_segments.txt") 362 | if os.path.exists(viral_segments_file_late): 363 | # ... Lógica de pergunta tardia se necessário, ou auto-uso? 364 | # Como o usuário já respondeu config, talvez ele queira refazer? 365 | # Para simplificar, se não carregou antes, assume que quer criar (ou perguntamos de novo). 366 | # Mas para não ficar chato, vamos perguntar só se não foi perguntado antes. 367 | pass 368 | 369 | print(i18n("Creating viral segments using {}...").format(ai_backend.upper())) 370 | viral_segments = create_viral_segments.create( 371 | num_segments, 372 | viral_mode, 373 | themes, 374 | args.min_duration, 375 | args.max_duration, 376 | ai_mode=ai_backend, 377 | api_key=api_key, 378 | project_folder=project_folder 379 | ) 380 | 381 | if not viral_segments or not viral_segments.get("segments"): 382 | print("Aviso: Nenhum segmento viral foi gerado. Verifique a resposta da IA.") 383 | 384 | save_json.save_viral_segments(viral_segments, project_folder=project_folder) 385 | 386 | # 4. Cut Segments 387 | cuts_folder = os.path.join(project_folder, "cuts") 388 | skip_cutting = False 389 | 390 | if os.path.exists(cuts_folder) and os.listdir(cuts_folder): 391 | print(i18n("\nExisting cuts found in: {}").format(cuts_folder)) 392 | if args.skip_prompts: 393 | cut_again_resp = 'no' 394 | else: 395 | cut_again_resp = input(i18n("Cuts already exist. Cut again? (yes/no) [default: no]: ")).strip().lower() 396 | 397 | # Default is no (skip) if they just press enter or say no 398 | if cut_again_resp not in ['y', 'yes']: 399 | skip_cutting = True 400 | 401 | if skip_cutting: 402 | print(i18n("Skipping Video Rendering (using existing cuts), but updating Subtitle JSONs...")) 403 | else: 404 | print(i18n("Cutting segments...")) 405 | 406 | cut_segments.cut(viral_segments, project_folder=project_folder, skip_video=skip_cutting) 407 | 408 | # 5. Workflow Check 409 | if workflow_choice == "2": 410 | print(i18n("Cut Only selected. Skipping Face Crop and Subtitles.")) 411 | print(i18n(f"Process completed! Check your results in: {project_folder}")) 412 | sys.exit(0) 413 | 414 | # 5. Edit Video (Face Crop) 415 | print(i18n("Editing video with {} (Mode: {})...").format(face_model, face_mode)) 416 | edit_video.edit(project_folder=project_folder, face_model=face_model, face_mode=face_mode, detection_period=detection_intervals) 417 | 418 | # 6. Subtitles 419 | burn_subtitles_option = True 420 | if burn_subtitles_option: 421 | print(i18n("Processing subtitles...")) 422 | # transcribe_cuts removido: JSON de legenda já é gerado no corte 423 | # transcribe_cuts.transcribe(project_folder=project_folder) 424 | 425 | sub_config = get_subtitle_config(args.subtitle_config) 426 | 427 | 428 | 429 | # Passa o dicionário desempacotado como argumentos, mais o project_folder 430 | adjust_subtitles.adjust(project_folder=project_folder, **sub_config) 431 | 432 | burn_subtitles.burn(project_folder=project_folder) 433 | else: 434 | print(i18n("Subtitle burning skipped.")) 435 | 436 | # Organização Final (Opcional, pois agora já está tudo em project_folder) 437 | # organize_output.organize(project_folder=project_folder) 438 | 439 | print(i18n("Process completed! Check your results in: {}").format(project_folder)) 440 | 441 | except Exception as e: 442 | print(i18n("\nAn error occurred: {}").format(str(e))) 443 | import traceback 444 | traceback.print_exc() 445 | sys.exit(1) 446 | 447 | if __name__ == "__main__": 448 | main() 449 | -------------------------------------------------------------------------------- /ViralCutter.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "source": [ 6 | "# ViralCutter\n", 7 | "Uma alternativa gratuita ao `opus.pro` e ao `vidyo.ai`" 8 | ], 9 | "metadata": { 10 | "id": "pa36OeArowme" 11 | } 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "source": [ 16 | "# Suporte em:\n", 17 | "[![](https://dcbadge.limes.pink/api/server/tAdPHFAbud)](https://discord.gg/tAdPHFAbud)" 18 | ], 19 | "metadata": { 20 | "id": "6Q-ljfsw1unE" 21 | } 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "source": [ 26 | "# TODO📝\n", 27 | "- [x] Release code\n", 28 | "- [ ] Huggingface SpaceDemo\n", 29 | "- [x] Two face in the cut\n", 30 | "- [x] Custom caption and burn\n", 31 | "- [ ] Make the code faster\n", 32 | "- [ ] More types of framing beyond 9:16" 33 | ], 34 | "metadata": { 35 | "id": "0o3KcBT5wzws" 36 | } 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": null, 41 | "metadata": { 42 | "id": "e76jiRnjONmj", 43 | "cellView": "form" 44 | }, 45 | "outputs": [], 46 | "source": [ 47 | "#@title 🛠️ Instalação\n", 48 | "import os\n", 49 | "import subprocess\n", 50 | "import shutil\n", 51 | "from IPython.display import clear_output\n", 52 | "\n", 53 | "# 1. Limpeza TOTAL\n", 54 | "print(\"🧹 Limpando instalação anterior...\")\n", 55 | "%cd /content\n", 56 | "if os.path.exists(\"ViralCutter\"):\n", 57 | " shutil.rmtree(\"ViralCutter\")\n", 58 | "\n", 59 | "!git clone -b dev https://github.com/RafaelGodoyEbert/ViralCutter.git\n", 60 | "%cd /content/ViralCutter\n", 61 | "\n", 62 | "print(\"⏳ Instalando gerenciador UV e drivers do sistema...\")\n", 63 | "\n", 64 | "# 2. Instalar UV e Drivers Linux\n", 65 | "subprocess.run(['pip', 'install', 'uv'], check=True)\n", 66 | "subprocess.run('sudo apt update -y && sudo apt install -y libcudnn8 ffmpeg xvfb', shell=True, check=True)\n", 67 | "\n", 68 | "# 3. Criar Ambiente Virtual\n", 69 | "print(\"⏳ Criando ambiente virtual...\")\n", 70 | "subprocess.run(['uv', 'venv', '.venv'], check=True)\n", 71 | "\n", 72 | "# 4. INSTALAÇÃO DAS DEPENDÊNCIAS\n", 73 | "print(\"⏳ Instalando Bibliotecas...\")\n", 74 | "\n", 75 | "# Passo A: WhisperX e Requisitos Básicos (Deixe instalar o que quiserem)\n", 76 | "cmds_fase_1 = [\n", 77 | " \"uv pip install --python .venv git+https://github.com/m-bain/whisperx.git\",\n", 78 | " \"uv pip install --python .venv -r requirements.txt\",\n", 79 | " \"uv pip install --python .venv yt-dlp pytubefix\"\n", 80 | "]\n", 81 | "\n", 82 | "for cmd in cmds_fase_1:\n", 83 | " subprocess.run(cmd, shell=True, check=True)\n", 84 | "\n", 85 | "# Passo B: CORREÇÃO DE ALINHAMENTO E GEMINI\n", 86 | "# - google-generativeai: Para o Gemini funcionar\n", 87 | "# - pandas: Para separar palavras\n", 88 | "# - transformers==4.46.3: VERSÃO CRÍTICA. Versões mais novas exigem Torch 2.6 e quebram o alinhamento.\n", 89 | "# - accelerate: Ajuda no carregamento do modelo\n", 90 | "print(\"🔨 Aplicando downgrade estratégico no Transformers...\")\n", 91 | "extra_libs = [\n", 92 | " \"uv pip install --python .venv google-generativeai\",\n", 93 | " \"uv pip install --python .venv pandas\",\n", 94 | " \"uv pip install --python .venv onnxruntime-gpu\",\n", 95 | " \"uv pip install --python .venv transformers==4.46.3 accelerate>=0.26.0\"\n", 96 | "]\n", 97 | "\n", 98 | "for cmd in extra_libs:\n", 99 | " subprocess.run(cmd, shell=True, check=True)\n", 100 | "\n", 101 | "# Passo C: O MARTELO FINAL (Torch 2.3.1 Estável)\n", 102 | "# Reinstalamos por último para garantir que nada atualizou ele sem querer\n", 103 | "print(\"🔨 Forçando versão estável do Torch (2.3.1)...\")\n", 104 | "cmd_fix_torch = (\n", 105 | " \"uv pip install --python .venv \"\n", 106 | " \"torch==2.3.1+cu121 torchvision==0.18.1+cu121 torchaudio==2.3.1+cu121 \"\n", 107 | " \"--index-url https://download.pytorch.org/whl/cu121\"\n", 108 | ")\n", 109 | "subprocess.run(cmd_fix_torch, shell=True, check=True)\n", 110 | "\n", 111 | "# Passo D: Trava do Numpy\n", 112 | "print(\"🔨 Travando Numpy...\")\n", 113 | "subprocess.run(\"uv pip install --python .venv 'numpy<2.0' setuptools==69.5.1\", shell=True, check=True)\n", 114 | "\n", 115 | "# 5. Configurar Monitor\n", 116 | "os.system('Xvfb :1 -screen 0 2560x1440x8 &')\n", 117 | "os.environ['DISPLAY'] = ':1.0'\n", 118 | "\n", 119 | "clear_output()\n", 120 | "print(\"✅ Instalação V7 Finalizada!\")\n", 121 | "print(\"- Transformers 4.46.3 (Compatível com Alinhamento): INSTALADO\")\n", 122 | "print(\"- Torch 2.3.1: ATIVO\")" 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "source": [ 128 | "#@title 🚀 Configuração e Execução\n", 129 | "%cd /content/ViralCutter\n", 130 | "import os\n", 131 | "import json\n", 132 | "import subprocess\n", 133 | "\n", 134 | "# --- FIX MATPLOTLIB ---\n", 135 | "os.environ['MPLBACKEND'] = 'Agg'\n", 136 | "\n", 137 | "# --- VERIFICAÇÃO DE PRÉ-VOO ---\n", 138 | "venv_python = \"/content/ViralCutter/.venv/bin/python\"\n", 139 | "print(\"🔍 Verificando bibliotecas críticas...\")\n", 140 | "try:\n", 141 | " subprocess.check_output([venv_python, \"-c\", \"import google.generativeai; print('Gemini OK')\"])\n", 142 | " print(\"✅ Gemini (google-generativeai): OK\")\n", 143 | "except:\n", 144 | " print(\"❌ ERRO: google-generativeai não encontrado!\")\n", 145 | "\n", 146 | "try:\n", 147 | " subprocess.check_output([venv_python, \"-c\", \"import pandas; print('Pandas OK')\"])\n", 148 | " print(\"✅ Pandas (Alinhamento): OK\")\n", 149 | "except:\n", 150 | " print(\"❌ ERRO: Pandas não encontrado (Alinhamento vai falhar)!\")\n", 151 | "\n", 152 | "#@markdown # 🚀 **Configuração Geral**\n", 153 | "url = 'https://youtu.be/1ZnwqFh3CrA?si=zpSig52k11NY7que' # @param {type: \"string\"}\n", 154 | "\n", 155 | "#@markdown ---\n", 156 | "#@markdown ### 🤖 **Inteligência Artificial**\n", 157 | "ai_backend = \"gemini\" #@param [\"gemini\", \"g4f\", \"manual\"]\n", 158 | "api_key = \"\" #@param {type:\"string\"}\n", 159 | "\n", 160 | "#@markdown ---\n", 161 | "#@markdown ### 🎬 **Configurações do Vídeo**\n", 162 | "num_segments = 3 # @param {type:\"number\"}\n", 163 | "viral_mode = True #@param{type:\"boolean\"}\n", 164 | "themes = '' # @param {type:\"string\", placeholder:\"Se `viral_mode` desmarcado, coloque o tema aqui\"}\n", 165 | "model = 'large-v3-turbo' # @param ['base', 'large-v3', 'large-v3-turbo']\n", 166 | "\n", 167 | "#@markdown ---\n", 168 | "#@markdown ### ⏳ **Configurações de Tempo**\n", 169 | "tempo_minimo = 15 #@param{type: \"number\"}\n", 170 | "tempo_maximo = 90 #@param{type: \"number\"}\n", 171 | "\n", 172 | "#@markdown ---\n", 173 | "#@markdown ### ⚙️ **Configurações Avançadas de Legenda**\n", 174 | "advanced_settings = False #@param {type:\"boolean\"}\n", 175 | "burn_subtitles_option = True # @param {type:\"boolean\"}\n", 176 | "\n", 177 | "#@markdown *(As opções abaixo só funcionam se \"advanced_settings\" estiver ativado)*\n", 178 | "\n", 179 | "#@markdown ### Fonte da legenda\n", 180 | "fonte = \"Arial\" #@param{type: \"string\"}\n", 181 | "#@markdown ### Tamanho da fonte\n", 182 | "base_size = 12 #@param{type: \"number\"}\n", 183 | "#@markdown ### Cor da fonte\n", 184 | "base_color_c = \"FFFFFF\" #@param{type: \"string\"}\n", 185 | "#@markdown ### Transparência da fonte (00=opaco, FF=transparente)\n", 186 | "base_color_t = \"00\" #@param{type: \"string\"}\n", 187 | "\n", 188 | "#@markdown ### Cor do contorno\n", 189 | "contorno_c = \"808080\" #@param{type: \"string\"}\n", 190 | "#@markdown ### Transparência do contorno\n", 191 | "contorno_t = \"00\" #@param{type: \"string\"}\n", 192 | "\n", 193 | "#@markdown ### Tamanho do highlight\n", 194 | "h_size = 14 #@param{type: \"number\"}\n", 195 | "#@markdown ### Cor do destaque\n", 196 | "highlight_color_c = \"00FF00\" #@param{type: \"string\"}\n", 197 | "\n", 198 | "#@markdown ### Palavras por bloco\n", 199 | "palavras_por_bloco = 5 #@param{type: \"number\"}\n", 200 | "#@markdown ### Limite de gap (segundos)\n", 201 | "limite_gap = 0.5 #@param{type: \"number\"}\n", 202 | "#@markdown ### Modo de exibição\n", 203 | "modo = 'highlight' # @param ['highlight', 'sem_higlight', 'palavra_por_palavra']\n", 204 | "#@markdown ### Posição vertical (default 60)\n", 205 | "posicao_vertical = 60 #@param{type: \"number\"}\n", 206 | "#@markdown ### Alinhamento (2=Centro)\n", 207 | "alinhamento = 2 #@param{type: \"slider\", min: 1, max: 3}\n", 208 | "\n", 209 | "#@markdown ### Estilo de borda (1=Contorno, 3=Caixa)\n", 210 | "estilo_da_borda = 3 #@param{type: \"number\"}\n", 211 | "#@markdown ### Espessura do contorno\n", 212 | "espessura_do_contorno = 1.5 #@param{type: \"number\"}\n", 213 | "#@markdown ### Tamanho da sombra\n", 214 | "tamanho_da_sombra = 10 #@param{type: \"number\"}\n", 215 | "\n", 216 | "# Outras configs fixas para avançado\n", 217 | "negrito = 0\n", 218 | "italico = 0\n", 219 | "sublinhado = 0\n", 220 | "tachado = 0\n", 221 | "cor_da_sombra_c = \"000000\"\n", 222 | "cor_da_sombra_t = \"00\"\n", 223 | "highlight_color_t = \"00\"\n", 224 | "\n", 225 | "burn_only = False #@param {type:\"boolean\"}\n", 226 | "\n", 227 | "# Create config file if advanced settings are enabled\n", 228 | "config_json_path = \"subtitle_config.json\"\n", 229 | "use_subtitle_config = False\n", 230 | "\n", 231 | "if advanced_settings:\n", 232 | " def make_color(color_hex, transparency_hex):\n", 233 | " return f\"&H{transparency_hex}\" + color_hex + \"&\"\n", 234 | "\n", 235 | " config_data = {\n", 236 | " \"font\": fonte,\n", 237 | " \"base_size\": base_size,\n", 238 | " \"base_color\": make_color(base_color_c, base_color_t),\n", 239 | " \"highlight_size\": h_size,\n", 240 | " \"highlight_color\": make_color(highlight_color_c, \"00\"),\n", 241 | " \"words_per_block\": palavras_por_bloco,\n", 242 | " \"gap_limit\": limite_gap,\n", 243 | " \"mode\": modo,\n", 244 | " \"vertical_position\": posicao_vertical,\n", 245 | " \"alignment\": alinhamento,\n", 246 | " \"border_style\": estilo_da_borda,\n", 247 | " \"outline_thickness\": espessura_do_contorno,\n", 248 | " \"outline_color\": make_color(contorno_c, contorno_t),\n", 249 | " \"shadow_size\": tamanho_da_sombra,\n", 250 | " \"shadow_color\": make_color(cor_da_sombra_c, cor_da_sombra_t),\n", 251 | " \"bold\": negrito,\n", 252 | " \"italic\": italico,\n", 253 | " \"underline\": sublinhado,\n", 254 | " \"strikeout\": tachado\n", 255 | " }\n", 256 | "\n", 257 | " with open(config_json_path, \"w\", encoding=\"utf-8\") as f:\n", 258 | " json.dump(config_data, f, indent=4)\n", 259 | " use_subtitle_config = True\n", 260 | " print(\"Advanced configuration saved.\")\n", 261 | "\n", 262 | "# --- COMANDO ---\n", 263 | "cmd = f\"{venv_python} main_improved.py --url \\\"{url}\\\" --segments {num_segments}\"\n", 264 | "\n", 265 | "if viral_mode:\n", 266 | " cmd += \" --viral\"\n", 267 | "else:\n", 268 | " if themes:\n", 269 | " cmd += f\" --themes \\\"{themes}\\\"\"\n", 270 | "\n", 271 | "cmd += f\" --min-duration {tempo_minimo} --max-duration {tempo_maximo} --model {model}\"\n", 272 | "cmd += f\" --ai-backend {ai_backend}\"\n", 273 | "\n", 274 | "if api_key:\n", 275 | " cmd += f\" --api-key \\\"{api_key}\\\"\"\n", 276 | "\n", 277 | "if burn_only:\n", 278 | " cmd += \" --burn-only\"\n", 279 | "\n", 280 | "cmd += \" --workflow 1\"\n", 281 | "cmd += \" --face-model insightface\"\n", 282 | "cmd += \" --face-mode auto\"\n", 283 | "cmd += \" --skip-prompts\"\n", 284 | "\n", 285 | "if use_subtitle_config:\n", 286 | " cmd += f\" --subtitle-config {config_json_path}\"\n", 287 | "\n", 288 | "print(f\"Executing: {cmd}\")\n", 289 | "print(\"Starting ViralCutter Pipeline...\")\n", 290 | "\n", 291 | "!{cmd}\n", 292 | "\n", 293 | "print(\"Process finished!\")" 294 | ], 295 | "metadata": { 296 | "cellView": "form", 297 | "id": "DNn8ZKJdD5XG" 298 | }, 299 | "execution_count": null, 300 | "outputs": [] 301 | }, 302 | { 303 | "cell_type": "markdown", 304 | "source": [ 305 | "# Resultado em vídeo" 306 | ], 307 | "metadata": { 308 | "id": "QWVaFdjIGY4i" 309 | } 310 | }, 311 | { 312 | "cell_type": "code", 313 | "execution_count": null, 314 | "metadata": { 315 | "id": "y3kiEMLqoHJ7", 316 | "cellView": "form" 317 | }, 318 | "outputs": [], 319 | "source": [ 320 | "#@title Resultado em vídeo\n", 321 | "#@markdown

Escolha o número do vídeo e o nome do projeto\n", 322 | "from IPython.display import HTML\n", 323 | "from base64 import b64encode\n", 324 | "import json\n", 325 | "import os\n", 326 | "\n", 327 | "# --- PARÂMETROS ---\n", 328 | "nome_do_projeto = \"Unknown_Video\" #@param {type:\"string\"}\n", 329 | "numero_do_video = 0 #@param {type:\"number\"}\n", 330 | "tipo_de_video = \"Legendas queimadas\" # @param [\"Legendas queimadas\", \"Video limpo\"]\n", 331 | "\n", 332 | "# --- DEFINIÇÃO DOS CAMINHOS ---\n", 333 | "# Define a pasta raiz do projeto ex: /content/ViralCutter/VIRALS/Unknown_Video\n", 334 | "base_dir = f'/content/ViralCutter/VIRALS/{nome_do_projeto}'\n", 335 | "\n", 336 | "# Define o caminho exato do arquivo de segmentos dentro da pasta do projeto\n", 337 | "segments_path = f'{base_dir}/viral_segments.txt'\n", 338 | "\n", 339 | "if tipo_de_video == \"Legendas queimadas\":\n", 340 | " pasta_video = 'burned_sub'\n", 341 | " sufix = 'processed_subtitled'\n", 342 | "else:\n", 343 | " pasta_video = 'final'\n", 344 | " sufix = 'processed'\n", 345 | "\n", 346 | "# --- FUNÇÕES ---\n", 347 | "\n", 348 | "def show_video(video_path, video_width=400):\n", 349 | " if not os.path.exists(video_path):\n", 350 | " return HTML(f\"

Erro: Vídeo não encontrado!

Caminho tentado: {video_path}

\")\n", 351 | "\n", 352 | " video_file = open(video_path, \"r+b\").read()\n", 353 | " video_url = f\"data:video/mp4;base64,{b64encode(video_file).decode()}\"\n", 354 | " return HTML(f\"\"\"\"\"\")\n", 355 | "\n", 356 | "# Carregar informações do arquivo de texto (JSON)\n", 357 | "segments_data = None\n", 358 | "if os.path.exists(segments_path):\n", 359 | " try:\n", 360 | " with open(segments_path, 'r') as file:\n", 361 | " segments_data = json.load(file)\n", 362 | " except Exception as e:\n", 363 | " print(f\"Erro ao ler o arquivo JSON: {e}\")\n", 364 | "else:\n", 365 | " print(f\"ALERTA: Arquivo '{segments_path}' não encontrado. As informações de texto não serão exibidas.\")\n", 366 | "\n", 367 | "# Função para exibir o vídeo e as informações\n", 368 | "def display_video_with_segment(index):\n", 369 | " # Exibir textos se o JSON foi carregado\n", 370 | " if segments_data:\n", 371 | " try:\n", 372 | " segment = segments_data['segments'][index]\n", 373 | " print(f\"Título: {segment.get('title', 'Sem título')}\")\n", 374 | " print(f\"Descrição: {segment.get('description', 'Sem descrição')}\")\n", 375 | " print(f\"Score: {segment.get('score', 'N/A')}\")\n", 376 | " print(\"-\" * 30)\n", 377 | " except IndexError:\n", 378 | " print(f\"Aviso: Não há informações de texto para o índice {index}.\")\n", 379 | " except KeyError:\n", 380 | " print(\"Aviso: Formato do JSON incorreto.\")\n", 381 | "\n", 382 | " # Caminho do vídeo\n", 383 | " # Ex: /content/ViralCutter/VIRALS/Unknown_Video/burned_sub/final-output000_processed_subtitled.mp4\n", 384 | " nome_arquivo = f'final-output{str(index).zfill(3)}_{sufix}.mp4'\n", 385 | " caminho_completo_video = f'{base_dir}/{pasta_video}/{nome_arquivo}'\n", 386 | "\n", 387 | " return show_video(caminho_completo_video)\n", 388 | "\n", 389 | "# Executar\n", 390 | "display_video_with_segment(numero_do_video)" 391 | ] 392 | }, 393 | { 394 | "cell_type": "markdown", 395 | "source": [ 396 | "## Baixar Zip | Off" 397 | ], 398 | "metadata": { 399 | "id": "JXEYU0p6li2G" 400 | } 401 | }, 402 | { 403 | "cell_type": "code", 404 | "source": [ 405 | "#@title Baixar todos os virais gerados\n", 406 | "# prompt: Zipar e baixar todos os arquivos da pasta /content/final\n", 407 | "\n", 408 | "!zip -r /content/final.zip /content/ViralCutter/final\n", 409 | "from google.colab import files\n", 410 | "files.download(\"/content/final.zip\")\n" 411 | ], 412 | "metadata": { 413 | "cellView": "form", 414 | "id": "yV7r-PC7nmZD" 415 | }, 416 | "execution_count": null, 417 | "outputs": [] 418 | }, 419 | { 420 | "cell_type": "code", 421 | "source": [ 422 | "#@title Baixar somente Cortes com resolução original 💻\n", 423 | "#@markdown Logo vai baixar os vídeos que são somente os cortes, sem a edição e foco no rosto\n", 424 | "import os\n", 425 | "import zipfile\n", 426 | "\n", 427 | "# Definir o diretório onde os vídeos estão localizados\n", 428 | "directory = '/content/ViralCutter/tmp'\n", 429 | "zip_filename = '/content/ViralCutter/videos_original_scale.zip'\n", 430 | "\n", 431 | "# Criar um arquivo zip\n", 432 | "with zipfile.ZipFile(zip_filename, 'w') as zipf:\n", 433 | " # Iterar sobre os arquivos no diretório\n", 434 | " for root, dirs, files in os.walk(directory):\n", 435 | " for file in files:\n", 436 | " if file.endswith('_original_scale.mp4'):\n", 437 | " # Adicionar o arquivo ao zip\n", 438 | " zipf.write(os.path.join(root, file), arcname=file)\n", 439 | "\n", 440 | "# Fazer o download do arquivo zip\n", 441 | "from google.colab import files\n", 442 | "files.download(zip_filename)\n" 443 | ], 444 | "metadata": { 445 | "cellView": "form", 446 | "id": "X0b_jKmX2y27" 447 | }, 448 | "execution_count": null, 449 | "outputs": [] 450 | }, 451 | { 452 | "cell_type": "code", 453 | "source": [ 454 | "#@title Baixar todos os virais gerados (com legendas queimadas)\n", 455 | "\n", 456 | "!zip -r /content/burned_sub.zip /content/ViralCutter/burned_sub\n", 457 | "from google.colab import files\n", 458 | "files.download(\"/content/burned_sub.zip\")\n" 459 | ], 460 | "metadata": { 461 | "id": "22ebNo4GHVYU", 462 | "cellView": "form" 463 | }, 464 | "execution_count": null, 465 | "outputs": [] 466 | }, 467 | { 468 | "cell_type": "markdown", 469 | "metadata": { 470 | "id": "nEIsMZLwJ5kD" 471 | }, 472 | "source": [ 473 | "#Créditos\n", 474 | "\n", 475 | "Inspirado no [reels clips automator](https://github.com/eddieoz/reels-clips-automator) e no [YoutubeVideoToAIPoweredShorts](https://github.com/Fitsbit/YoutubeVideoToAIPoweredShorts)
\n", 476 | "\n", 477 | "---\n", 478 | "![Rafa.png](https://i.imgur.com/cGknQpU.png;base64)\n", 479 | "\n", 480 | "Desenvolvido por **Rafa.Godoy**
\n", 481 | "[ ![GitHub](https://img.shields.io/badge/github-%23121011.svg?style=for-the-badge&logo=github&logoColor=white) ](https://github.com/rafaelGodoyEbert)
\n", 482 | "[ ![X](https://img.shields.io/twitter/url?url=https%3A%2F%2Ftwitter.com%2FGodoyEbert) ](https://twitter.com/GodoyEbert)
\n", 483 | "[Instagram](https://www.instagram.com/rafael.godoy.ebert/)
\n", 484 | "[ ![](https://dcbadge.vercel.app/api/server/aihubbrasil) ](https://discord.gg/aihubbrasil)" 485 | ] 486 | }, 487 | { 488 | "cell_type": "markdown", 489 | "source": [ 490 | "`0.7v Alpha`
\n", 491 | "\n", 492 | "Apenas uma alternativa gratuita ao `opus.pro` e ao `vidyo.ai`
\n" 493 | ], 494 | "metadata": { 495 | "id": "tpLJmPqGT5_u" 496 | } 497 | } 498 | ], 499 | "metadata": { 500 | "accelerator": "GPU", 501 | "colab": { 502 | "gpuType": "T4", 503 | "provenance": [] 504 | }, 505 | "kernelspec": { 506 | "display_name": "Python 3", 507 | "name": "python3" 508 | }, 509 | "language_info": { 510 | "name": "python" 511 | } 512 | }, 513 | "nbformat": 4, 514 | "nbformat_minor": 0 515 | } -------------------------------------------------------------------------------- /scripts/edit_video.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import os 4 | import subprocess 5 | import mediapipe as mp 6 | from scripts.one_face import crop_and_resize_single_face, resize_with_padding, detect_face_or_body 7 | from scripts.two_face import crop_and_resize_two_faces, detect_face_or_body_two_faces 8 | try: 9 | from scripts.face_detection_insightface import init_insightface, detect_faces_insightface, crop_and_resize_insightface 10 | INSIGHTFACE_AVAILABLE = True 11 | except ImportError: 12 | INSIGHTFACE_AVAILABLE = False 13 | print("InsightFace not found or error importing. Install with: pip install insightface onnxruntime-gpu") 14 | 15 | def get_center_bbox(bbox): 16 | # bbox: [x1, y1, x2, y2] 17 | return ((bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2) 18 | 19 | def get_center_rect(rect): 20 | # rect: (x, y, w, h) 21 | return (rect[0] + rect[2] / 2, rect[1] + rect[3] / 2) 22 | 23 | def sort_by_proximity(new_faces, old_faces, center_func): 24 | """ 25 | Sorts new_faces to match the order of old_faces based on distance. 26 | new_faces: list of face objects (bbox or tuple) 27 | old_faces: list of face objects (bbox or tuple) 28 | center_func: function that takes a face object and returns (cx, cy) 29 | """ 30 | if not old_faces or len(old_faces) != 2 or len(new_faces) != 2: 31 | return new_faces 32 | 33 | old_c1 = center_func(old_faces[0]) 34 | old_c2 = center_func(old_faces[1]) 35 | 36 | new_c1 = center_func(new_faces[0]) 37 | new_c2 = center_func(new_faces[1]) 38 | 39 | # Cost if we keep order: [new1, new2] 40 | # dist(old1, new1) + dist(old2, new2) 41 | dist_keep = ((old_c1[0]-new_c1[0])**2 + (old_c1[1]-new_c1[1])**2) + \ 42 | ((old_c2[0]-new_c2[0])**2 + (old_c2[1]-new_c2[1])**2) 43 | 44 | # Cost if we swap: [new2, new1] 45 | # dist(old1, new2) + dist(old2, new1) 46 | dist_swap = ((old_c1[0]-new_c2[0])**2 + (old_c1[1]-new_c2[1])**2) + \ 47 | ((old_c2[0]-new_c1[0])**2 + (old_c2[1]-new_c1[1])**2) 48 | 49 | # If swapping reduces total movement distance, do it 50 | if dist_swap < dist_keep: 51 | return [new_faces[1], new_faces[0]] 52 | 53 | return new_faces 54 | 55 | def generate_short_fallback(input_file, output_file, index, project_folder, final_folder): 56 | """Fallback function: Center Crop if MediaPipe fails.""" 57 | print(f"Processing (Center Crop Fallback): {input_file}") 58 | cap = cv2.VideoCapture(input_file) 59 | if not cap.isOpened(): 60 | print(f"Error opening video: {input_file}") 61 | return 62 | 63 | fps = cap.get(cv2.CAP_PROP_FPS) 64 | width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) 65 | height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) 66 | 67 | # Target dimensions (9:16) 68 | target_width = 1080 69 | target_height = 1920 70 | 71 | # Use FFmpeg Pipe instead of cv2.VideoWriter to avoid OpenCV backend errors 72 | ffmpeg_cmd = [ 73 | 'ffmpeg', '-y', '-loglevel', 'error', '-hide_banner', '-stats', 74 | '-f', 'rawvideo', 75 | '-vcodec', 'rawvideo', 76 | '-s', f'{target_width}x{target_height}', 77 | '-pix_fmt', 'bgr24', 78 | '-r', str(fps), 79 | '-i', '-', 80 | '-c:v', 'libx264', # or h264_nvenc if available 81 | '-preset', 'fast', 82 | '-pix_fmt', 'yuv420p', 83 | output_file 84 | ] 85 | 86 | process = subprocess.Popen(ffmpeg_cmd, stdin=subprocess.PIPE) 87 | 88 | while True: 89 | ret, frame = cap.read() 90 | if not ret: 91 | break 92 | 93 | # Resize mantendo aspect ratio para cobrir altura 1920 94 | scale_factor = target_height / height 95 | # Se após o resize a largura for menor que 1080, escala pela largura 96 | if width * scale_factor < target_width: 97 | scale_factor = target_width / width 98 | 99 | # Garante dimensoes inteiras 100 | new_w = int(width * scale_factor) 101 | new_h = int(height * scale_factor) 102 | 103 | resized = cv2.resize(frame, (new_w, new_h)) 104 | 105 | # Crop center 106 | res_h, res_w, _ = resized.shape 107 | start_x = (res_w - target_width) // 2 108 | start_y = (res_h - target_height) // 2 109 | 110 | if start_x < 0: start_x = 0 111 | if start_y < 0: start_y = 0 112 | 113 | cropped = resized[start_y:start_y+target_height, start_x:start_x+target_width] 114 | 115 | # Resize final por segurança e validação 116 | if cropped.shape[1] != target_width or cropped.shape[0] != target_height: 117 | cropped = cv2.resize(cropped, (target_width, target_height)) 118 | 119 | try: 120 | # Write raw bytes to ffmpeg stdin 121 | process.stdin.write(cropped.tobytes()) 122 | except Exception as e: 123 | print(f"Error writing frame to ffmpeg pipe: {e}") 124 | pass 125 | 126 | cap.release() 127 | process.stdin.close() 128 | process.wait() 129 | 130 | finalize_video(input_file, output_file, index, fps, project_folder, final_folder) 131 | 132 | def finalize_video(input_file, output_file, index, fps, project_folder, final_folder): 133 | """Mux audio and video.""" 134 | audio_file = os.path.join(project_folder, "cuts", f"output-audio-{index}.aac") 135 | subprocess.run(["ffmpeg", "-y", "-hide_banner", "-loglevel", "error", "-i", input_file, "-vn", "-acodec", "copy", audio_file], 136 | check=False, capture_output=True) 137 | 138 | if os.path.exists(audio_file) and os.path.getsize(audio_file) > 0: 139 | final_output = os.path.join(final_folder, f"final-output{str(index).zfill(3)}_processed.mp4") 140 | command = [ 141 | "ffmpeg", "-y", "-hide_banner", "-loglevel", "error", "-stats", 142 | "-i", output_file, 143 | "-i", audio_file, 144 | "-c:v", "h264_nvenc", "-preset", "fast", "-b:v", "5M", 145 | "-c:a", "aac", "-b:a", "192k", 146 | "-r", str(fps), 147 | final_output 148 | ] 149 | try: 150 | subprocess.run(command, check=True) #, capture_output=True) 151 | print(f"Final file generated: {final_output}") 152 | try: 153 | os.remove(audio_file) 154 | os.remove(output_file) 155 | except: 156 | pass 157 | except subprocess.CalledProcessError as e: 158 | print(f"Error muxing: {e}") 159 | else: 160 | print(f"Warning: No audio extracted for {input_file}") 161 | 162 | 163 | def generate_short_mediapipe(input_file, output_file, index, face_mode, project_folder, final_folder, face_detection, face_mesh, pose, detection_period=None): 164 | try: 165 | cap = cv2.VideoCapture(input_file) 166 | if not cap.isOpened(): 167 | print(f"Error opening video: {input_file}") 168 | return 169 | 170 | fps = cap.get(cv2.CAP_PROP_FPS) 171 | frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) 172 | frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) 173 | total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) 174 | 175 | fourcc = cv2.VideoWriter_fourcc(*'mp4v') 176 | out = cv2.VideoWriter(output_file, fourcc, fps, (1080, 1920)) 177 | 178 | next_detection_frame = 0 179 | current_interval = int(5 * fps) # Initial guess 180 | 181 | # Initial Interval Logic if predefined 182 | 183 | if detection_period is not None: 184 | current_interval = max(1, int(detection_period * fps)) 185 | elif face_mode == "2": 186 | current_interval = int(1.0 * fps) 187 | 188 | last_detected_faces = None 189 | last_frame_face_positions = None 190 | frames_since_last_detection = 0 191 | max_frames_without_detection = int(5 * fps) # Fallback timeout 192 | 193 | transition_duration = int(fps) 194 | transition_frames = [] 195 | 196 | for frame_index in range(total_frames): 197 | ret, frame = cap.read() 198 | if not ret or frame is None: 199 | break 200 | 201 | if frame_index >= next_detection_frame: 202 | # Detect ALL faces (up to 2 in our implementation) 203 | detections = detect_face_or_body_two_faces(frame, face_detection, face_mesh, pose) 204 | 205 | # Dynamic Logic 206 | target_faces = 1 207 | if face_mode == "2": 208 | target_faces = 2 209 | elif face_mode == "auto": 210 | if detections and len(detections) >= 2: 211 | target_faces = 2 212 | else: 213 | target_faces = 1 214 | 215 | # Filter detections based on target 216 | current_detections = [] 217 | if detections: 218 | # Sort detections by approximate Area (w*h) descending to pick main faces first 219 | detections.sort(key=lambda s: s[2] * s[3], reverse=True) 220 | 221 | if len(detections) >= target_faces: 222 | current_detections = detections[:target_faces] 223 | elif len(detections) > 0: 224 | # Fallback 225 | current_detections = detections[:1] 226 | target_faces = 1 227 | 228 | # Apply Consistency Check (Proximity) 229 | if target_faces == 2 and len(current_detections) == 2: 230 | if last_detected_faces is not None and len(last_detected_faces) == 2: 231 | current_detections = sort_by_proximity(current_detections, last_detected_faces, get_center_rect) 232 | 233 | # Check for stability/lookahead could go here but skipping for brevity unless requested. 234 | 235 | if current_detections and len(current_detections) == target_faces: 236 | if last_frame_face_positions is not None: 237 | start_faces = np.array(last_frame_face_positions) 238 | end_faces = np.array(current_detections) 239 | try: 240 | transition_frames = np.linspace(start_faces, end_faces, transition_duration, dtype=int) 241 | except Exception as e: 242 | # Fallback if shapes mismatch unexpectedly 243 | transition_frames = [] 244 | else: 245 | transition_frames = [] 246 | last_detected_faces = current_detections 247 | frames_since_last_detection = 0 248 | else: 249 | frames_since_last_detection += 1 250 | 251 | # Update next detection frame 252 | step = 5 253 | 254 | if detection_period is not None: 255 | if isinstance(detection_period, dict): 256 | # If we are targeting 2 faces, we use '2' interval, else '1' 257 | key = str(target_faces) 258 | val = detection_period.get(key, detection_period.get('1', 0.2)) 259 | step = max(1, int(val * fps)) 260 | else: 261 | step = max(1, int(detection_period * fps)) 262 | elif target_faces == 2: 263 | step = int(1.0 * fps) 264 | else: 265 | step = int(5) # 5 frames for 1 face 266 | 267 | next_detection_frame = frame_index + step 268 | 269 | if len(transition_frames) > 0: 270 | current_faces = transition_frames[0] 271 | transition_frames = transition_frames[1:] 272 | elif last_detected_faces is not None and frames_since_last_detection <= max_frames_without_detection: 273 | current_faces = last_detected_faces 274 | else: 275 | result = resize_with_padding(frame) 276 | out.write(result) 277 | continue 278 | 279 | last_frame_face_positions = current_faces 280 | 281 | if hasattr(current_faces, '__len__') and len(current_faces) == 2: 282 | result = crop_and_resize_two_faces(frame, current_faces) 283 | else: 284 | # Ensure it's list of tuples or single tuple? current_faces is list of tuples from detection 285 | # If 1 face: [ (x,y,w,h) ] 286 | if hasattr(current_faces, '__len__') and len(current_faces) > 0: 287 | f = current_faces[0] 288 | result = crop_and_resize_single_face(frame, f) 289 | else: 290 | result = resize_with_padding(frame) 291 | 292 | out.write(result) 293 | 294 | cap.release() 295 | out.release() 296 | 297 | finalize_video(input_file, output_file, index, fps, project_folder, final_folder) 298 | 299 | except Exception as e: 300 | print(f"Error in MediaPipe processing: {e}") 301 | raise e # Rethrow to trigger fallback 302 | 303 | def generate_short_haar(input_file, output_file, index, project_folder, final_folder, detection_period=None): 304 | """Face detection using OpenCV Haar Cascades.""" 305 | print(f"Processing (Haar Cascade): {input_file}") 306 | 307 | # Load Haar Cascade 308 | cascade_path = cv2.data.haarcascades + 'haarcascade_frontalface_default.xml' 309 | face_cascade = cv2.CascadeClassifier(cascade_path) 310 | if face_cascade.empty(): 311 | print("Error: Could not load Haar Cascade XML. Falling back to center crop.") 312 | generate_short_fallback(input_file, output_file, index, project_folder, final_folder) 313 | return 314 | 315 | cap = cv2.VideoCapture(input_file) 316 | if not cap.isOpened(): 317 | print(f"Error opening video: {input_file}") 318 | return 319 | 320 | fps = cap.get(cv2.CAP_PROP_FPS) 321 | total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) 322 | 323 | fourcc = cv2.VideoWriter_fourcc(*'mp4v') 324 | out = cv2.VideoWriter(output_file, fourcc, fps, (1080, 1920)) 325 | 326 | # Logic copied from generate_short_mediapipe 327 | detection_interval = int(2 * fps) # Default check every 2 seconds 328 | if detection_period is not None: 329 | detection_interval = max(1, int(detection_period * fps)) 330 | last_detected_faces = None 331 | last_frame_face_positions = None 332 | frames_since_last_detection = 0 333 | max_frames_without_detection = int(5 * fps) 334 | 335 | transition_duration = int(fps) # 1 second smooth transition 336 | transition_frames = [] 337 | 338 | for frame_index in range(total_frames): 339 | ret, frame = cap.read() 340 | if not ret or frame is None: 341 | break 342 | 343 | if frame_index % detection_interval == 0: 344 | gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) 345 | faces = face_cascade.detectMultiScale(gray, 1.1, 4) 346 | 347 | detections = [] 348 | if len(faces) > 0: 349 | # Pick largest face 350 | largest_face = max(faces, key=lambda f: f[2] * f[3]) 351 | # Ensure int type 352 | detections = [tuple(map(int, largest_face))] 353 | 354 | if detections: 355 | if last_frame_face_positions is not None: 356 | # Simple linear interpolation for smoothing 357 | start_faces = np.array(last_frame_face_positions) 358 | end_faces = np.array(detections) 359 | 360 | # Generate transition frames 361 | steps = transition_duration 362 | transition_frames = [] 363 | for s in range(steps): 364 | t = (s + 1) / steps 365 | interp = (1 - t) * start_faces + t * end_faces 366 | transition_frames.append(interp.astype(int).tolist()) # Convert back to list of lists/tuples 367 | else: 368 | transition_frames = [] 369 | last_detected_faces = detections 370 | frames_since_last_detection = 0 371 | else: 372 | frames_since_last_detection += 1 373 | 374 | if len(transition_frames) > 0: 375 | current_faces = transition_frames[0] 376 | transition_frames = transition_frames[1:] 377 | elif last_detected_faces is not None and frames_since_last_detection <= max_frames_without_detection: 378 | current_faces = last_detected_faces 379 | else: 380 | # No face detected for a while -> Center/Padding fallback 381 | result = resize_with_padding(frame) 382 | out.write(result) 383 | continue 384 | 385 | last_frame_face_positions = current_faces 386 | # haar detections are list containing one tuple (x,y,w,h) 387 | # current_faces is list of one tuple 388 | if isinstance(current_faces, list): 389 | face_bbox = current_faces[0] 390 | else: 391 | face_bbox = current_faces # Should be handled 392 | 393 | result = crop_and_resize_single_face(frame, face_bbox) 394 | out.write(result) 395 | 396 | cap.release() 397 | out.release() 398 | 399 | finalize_video(input_file, output_file, index, fps, project_folder, final_folder) 400 | 401 | def generate_short_insightface(input_file, output_file, index, project_folder, final_folder, face_mode="auto", detection_period=None): 402 | """Face detection using InsightFace (SOTA).""" 403 | print(f"Processing (InsightFace): {input_file} | Mode: {face_mode}") 404 | 405 | cap = cv2.VideoCapture(input_file) 406 | if not cap.isOpened(): 407 | print(f"Error opening video: {input_file}") 408 | return 409 | 410 | fps = cap.get(cv2.CAP_PROP_FPS) 411 | total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) 412 | 413 | # Using mp4v for container, but final mux will fix encoding 414 | fourcc = cv2.VideoWriter_fourcc(*'mp4v') 415 | out = cv2.VideoWriter(output_file, fourcc, fps, (1080, 1920)) 416 | 417 | # Dynamic Interval Logic 418 | next_detection_frame = 0 419 | 420 | last_detected_faces = None 421 | last_frame_face_positions = None 422 | frames_since_last_detection = 0 423 | max_frames_without_detection = 90 # 3 seconds timeout 424 | 425 | transition_duration = 4 # Smooth transition over 4 frames (almost continuous) 426 | transition_frames = [] 427 | 428 | # Current state of face mode (1 or 2) 429 | # If auto, we decide per detection interval 430 | current_num_faces_state = 1 431 | if face_mode == "2": 432 | current_num_faces_state = 2 433 | 434 | frame_1_face_count = 0 435 | frame_2_face_count = 0 436 | 437 | buffered_frame = None 438 | 439 | # Timeline tracking: list of (frame_index, mode_str) 440 | # We will compress this later. 441 | timeline_frames = [] # Store mode for *every written frame* or at least detection points 442 | 443 | for frame_index in range(total_frames): 444 | if buffered_frame is not None: 445 | frame = buffered_frame 446 | ret = True 447 | buffered_frame = None 448 | else: 449 | ret, frame = cap.read() 450 | 451 | if not ret or frame is None: 452 | break 453 | 454 | if frame_index >= next_detection_frame and len(transition_frames) == 0: 455 | # Detect faces 456 | faces = detect_faces_insightface(frame) 457 | 458 | # Decide 1 or 2 faces 459 | target_faces = 1 460 | if face_mode == "2": 461 | target_faces = 2 462 | elif face_mode == "auto": 463 | if len(faces) >= 2: 464 | target_faces = 2 465 | else: 466 | target_faces = 1 467 | 468 | # Fallback Lookahead: If detection fails or partial 469 | if len(faces) < target_faces: 470 | # Try 1 frame ahead 471 | ret2, frame2 = cap.read() 472 | if ret2 and frame2 is not None: 473 | faces2 = detect_faces_insightface(frame2) 474 | # If lookahead found what we wanted OR found something better than nothing 475 | if len(faces2) >= target_faces: 476 | faces = faces2 # Use lookahead faces for current frame 477 | # (This assumes movement is small enough between 1 frame to be valid) 478 | elif len(faces) == 0 and len(faces2) > 0: 479 | faces = faces2 # Better than nothing 480 | 481 | buffered_frame = frame2 # Store for next iteration 482 | 483 | detections = [] 484 | 485 | if len(faces) >= target_faces: 486 | # Pick top N faces by area 487 | faces_sorted = sorted(faces, key=lambda f: (f['bbox'][2]-f['bbox'][0]) * (f['bbox'][3]-f['bbox'][1]), reverse=True) 488 | 489 | if target_faces == 2: 490 | # Convert [x1, y1, x2, y2] to (x, y, w, h) for two_face compatibility logic or custom logic 491 | # We will store [x1, y1, x2, y2] for interpolation, and convert during crop 492 | 493 | # Ensure we have 2 faces 494 | f1 = faces_sorted[0]['bbox'] 495 | f2 = faces_sorted[1]['bbox'] 496 | 497 | if last_detected_faces is not None and len(last_detected_faces) == 2: 498 | detections = sort_by_proximity([f1, f2], last_detected_faces, get_center_bbox) 499 | else: 500 | detections = [f1, f2] 501 | 502 | current_num_faces_state = 2 503 | else: 504 | # 1 face 505 | detections = [faces_sorted[0]['bbox']] 506 | current_num_faces_state = 1 507 | else: 508 | # If we wanted 2 but found 1, or wanted 1 found 0 509 | if len(faces) > 0: 510 | # Fallback to 1 face if found at least 1 511 | faces_sorted = sorted(faces, key=lambda f: (f['bbox'][2]-f['bbox'][0]) * (f['bbox'][3]-f['bbox'][1]), reverse=True) 512 | detections = [faces_sorted[0]['bbox']] 513 | current_num_faces_state = 1 514 | else: 515 | detections = [] 516 | 517 | if detections: 518 | if last_frame_face_positions is not None and len(last_frame_face_positions) == len(detections): 519 | # Transition 520 | start_faces = np.array(last_frame_face_positions) 521 | end_faces = np.array(detections) 522 | 523 | steps = transition_duration 524 | transition_frames = [] 525 | for s in range(steps): 526 | t = (s + 1) / steps 527 | interp = (1 - t) * start_faces + t * end_faces 528 | transition_frames.append(interp.astype(int).tolist()) 529 | else: 530 | # Reset transition if face count changed or first detect 531 | transition_frames = [] 532 | last_detected_faces = detections 533 | frames_since_last_detection = 0 534 | else: 535 | frames_since_last_detection += 1 536 | 537 | # Update next detection frame based on NEW state 538 | step = 5 # Default fallback (very fast) 539 | 540 | if detection_period is not None: 541 | if isinstance(detection_period, dict): 542 | # Period depends on state 543 | key = str(current_num_faces_state) 544 | # fallback to '1' if key not found (should be there) 545 | val = detection_period.get(key, detection_period.get('1', 0.2)) 546 | step = max(1, int(val * fps)) 547 | else: 548 | # Legacy float support (should not happen with new main.py but good safety) 549 | step = max(1, int(detection_period * fps)) 550 | elif current_num_faces_state == 2: 551 | step = int(1.0 * fps) # 1s for 2 faces 552 | else: 553 | step = 5 # 5 frames for 1 face (~0.16s at 30fps) 554 | 555 | next_detection_frame = frame_index + step 556 | 557 | if len(transition_frames) > 0: 558 | current_faces = transition_frames[0] 559 | transition_frames = transition_frames[1:] 560 | elif last_detected_faces is not None and frames_since_last_detection <= max_frames_without_detection: 561 | current_faces = last_detected_faces 562 | else: 563 | # Fallback for this frame 564 | result = resize_with_padding(frame) 565 | out.write(result) 566 | continue 567 | 568 | last_frame_face_positions = current_faces 569 | 570 | target_len = len(current_faces) 571 | 572 | if target_len == 2: 573 | frame_2_face_count += 1 574 | # Convert [x1, y1, x2, y2] to (x, y, w, h) 575 | f1 = current_faces[0] 576 | f2 = current_faces[1] 577 | rect1 = (f1[0], f1[1], f1[2]-f1[0], f1[3]-f1[1]) 578 | rect2 = (f2[0], f2[1], f2[2]-f2[0], f2[3]-f2[1]) 579 | result = crop_and_resize_two_faces(frame, [rect1, rect2]) 580 | timeline_frames.append((frame_index, "2")) 581 | else: 582 | frame_1_face_count += 1 583 | # 1 face 584 | # current_faces[0] is [x1, y1, x2, y2] 585 | result = crop_and_resize_insightface(frame, current_faces[0]) 586 | timeline_frames.append((frame_index, "1")) 587 | 588 | out.write(result) 589 | 590 | cap.release() 591 | out.release() 592 | 593 | # Compress timeline into segments 594 | # [(start_time, end_time, mode), ...] 595 | compressed_timeline = [] 596 | if timeline_frames: 597 | curr_mode = timeline_frames[0][1] 598 | start_f = timeline_frames[0][0] 599 | 600 | for i in range(1, len(timeline_frames)): 601 | frame_idx, mode = timeline_frames[i] 602 | if mode != curr_mode: 603 | # End current segment 604 | # Convert frame to seconds 605 | end_f = timeline_frames[i-1][0] 606 | compressed_timeline.append({ 607 | "start": float(start_f) / fps, 608 | "end": float(end_f) / fps, # or frame_idx / fps for continuity 609 | "mode": curr_mode 610 | }) 611 | # Start new 612 | curr_mode = mode 613 | start_f = frame_idx 614 | 615 | # Add last 616 | end_f = timeline_frames[-1][0] 617 | compressed_timeline.append({ 618 | "start": float(start_f) / fps, 619 | "end": (float(end_f) + 1) / fps, 620 | "mode": curr_mode 621 | }) 622 | 623 | # Save timeline JSON 624 | timeline_file = output_file.replace(".mp4", "_timeline.json") 625 | try: 626 | import json 627 | with open(timeline_file, "w") as f: 628 | json.dump(compressed_timeline, f) 629 | print(f"Timeline saved: {timeline_file}") 630 | except Exception as e: 631 | print(f"Error saving timeline: {e}") 632 | 633 | finalize_video(input_file, output_file, index, fps, project_folder, final_folder) 634 | 635 | # Return dominant mode logic (or keep 15% rule as overall fallback) 636 | if frame_2_face_count > (total_frames * 0.15): 637 | return "2" 638 | return "1" 639 | 640 | 641 | def edit(project_folder="tmp", face_model="insightface", face_mode="auto", detection_period=None): 642 | mp_face_detection = mp.solutions.face_detection 643 | mp_face_mesh = mp.solutions.face_mesh 644 | mp_pose = mp.solutions.pose 645 | 646 | index = 0 647 | cuts_folder = os.path.join(project_folder, "cuts") 648 | final_folder = os.path.join(project_folder, "final") 649 | os.makedirs(final_folder, exist_ok=True) 650 | 651 | face_modes_log = {} 652 | 653 | # Priority: User Choice -> Fallbacks 654 | 655 | insightface_working = False 656 | 657 | # Only init InsightFace if selected or default 658 | if INSIGHTFACE_AVAILABLE and (face_model == "insightface"): 659 | try: 660 | print("Initializing InsightFace...") 661 | init_insightface() 662 | insightface_working = True 663 | print("InsightFace Initialized Successfully.") 664 | except Exception as e: 665 | print(f"WARNING: InsightFace Initialization Failed ({e}). Will try MediaPipe.") 666 | insightface_working = False 667 | 668 | mediapipe_working = False 669 | use_haar = False 670 | 671 | # If insightface failed OR user chose mediapipe, init mediapipe 672 | should_use_mediapipe = (face_model == "mediapipe") or (face_model == "insightface" and not insightface_working) 673 | 674 | if should_use_mediapipe: 675 | try: 676 | # Try to init with model_selection=0 (Short Range) 677 | with mp_face_detection.FaceDetection(model_selection=0, min_detection_confidence=0.5) as fd: 678 | pass 679 | mediapipe_working = True 680 | print("MediaPipe Initialized Successfully.") 681 | except Exception as e: 682 | print(f"WARNING: MediaPipe Initialization Failed ({e}). Switching to OpenCV Haar Cascade.") 683 | mediapipe_working = False 684 | use_haar = True 685 | 686 | # Logic for MediaPipe replaced by dynamic pass 687 | # mp_num_faces = 2 if face_mode == "2" else 1 688 | 689 | while True: 690 | input_filename = f"output{str(index).zfill(3)}_original_scale.mp4" 691 | input_file = os.path.join(cuts_folder, input_filename) 692 | output_file = os.path.join(final_folder, f"temp_video_no_audio_{index}.mp4") 693 | 694 | if os.path.exists(input_file): 695 | success = False 696 | detected_mode = "1" # Default if detection fails or fallback 697 | 698 | # 1. Try InsightFace 699 | if insightface_working: 700 | try: 701 | # Capture returned mode 702 | res = generate_short_insightface(input_file, output_file, index, project_folder, final_folder, face_mode=face_mode, detection_period=detection_period) 703 | if res: detected_mode = res 704 | success = True 705 | except Exception as e: 706 | print(f"InsightFace processing failed for {input_filename}: {e}") 707 | print("Falling back to MediaPipe/Haar...") 708 | 709 | # 2. Try MediaPipe if InsightFace failed or not available 710 | if not success and mediapipe_working: 711 | try: 712 | with mp_face_detection.FaceDetection(model_selection=1, min_detection_confidence=0.2) as face_detection, \ 713 | mp_face_mesh.FaceMesh(static_image_mode=False, max_num_faces=2, refine_landmarks=True, min_detection_confidence=0.2, min_tracking_confidence=0.2) as face_mesh, \ 714 | mp_pose.Pose(static_image_mode=False, min_detection_confidence=0.5, min_tracking_confidence=0.5) as pose: 715 | 716 | generate_short_mediapipe(input_file, output_file, index, face_mode, project_folder, final_folder, face_detection, face_mesh, pose, detection_period=detection_period) 717 | # We don't easily know detected mode here without return, assuming '1' or '2' based on last frame? 718 | # Ideally function should return as well. 719 | detected_mode = "1" # Placeholder, user didn't complain about stats. 720 | # detected_mode = str(mp_num_faces) # Error fix: mp_num_faces not defined 721 | if face_mode == "2": 722 | detected_mode = "2" 723 | success = True 724 | except Exception as e: 725 | print(f"MediaPipe processing failed (fallback): {e}") 726 | 727 | # 3. Try Haar if others failed 728 | if not success and (use_haar or (not mediapipe_working and not insightface_working)): 729 | try: 730 | print("Attempts with Haar Cascade...") 731 | generate_short_haar(input_file, output_file, index, project_folder, final_folder, detection_period=detection_period) 732 | success = True 733 | except Exception as e2: 734 | print(f"Haar fallback also failed: {e2}") 735 | 736 | # 4. Last Resort: Center Crop 737 | if not success: 738 | generate_short_fallback(input_file, output_file, index, project_folder, final_folder) 739 | detected_mode = "1" 740 | 741 | # Save mode 742 | face_modes_log[f"output{str(index).zfill(3)}"] = detected_mode 743 | 744 | else: 745 | if index == 0: 746 | print(f"No files found in {cuts_folder}.") 747 | break 748 | index += 1 749 | 750 | # Save Face Modes to JSON for subtitle usage 751 | modes_file = os.path.join(project_folder, "face_modes.json") 752 | try: 753 | import json 754 | with open(modes_file, "w") as f: 755 | json.dump(face_modes_log, f) 756 | print(f"Detect Stats saved: {modes_file}") 757 | except Exception as e: 758 | print(f"Error saving face modes: {e}") 759 | 760 | if __name__ == "__main__": 761 | edit() -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | Preamble 9 | 10 | The GNU General Public License is a free, copyleft license for 11 | software and other kinds of works. 12 | 13 | The licenses for most software and other practical works are designed 14 | to take away your freedom to share and change the works. By contrast, 15 | the GNU General Public License is intended to guarantee your freedom to 16 | share and change all versions of a program--to make sure it remains free 17 | software for all its users. We, the Free Software Foundation, use the 18 | GNU General Public License for most of our software; it applies also to 19 | any other work released this way by its authors. You can apply it to 20 | your programs, too. 21 | 22 | When we speak of free software, we are referring to freedom, not 23 | price. Our General Public Licenses are designed to make sure that you 24 | have the freedom to distribute copies of free software (and charge for 25 | them if you wish), that you receive source code or can get it if you 26 | want it, that you can change the software or use pieces of it in new 27 | free programs, and that you know you can do these things. 28 | 29 | To protect your rights, we need to prevent others from denying you 30 | these rights or asking you to surrender the rights. Therefore, you have 31 | certain responsibilities if you distribute copies of the software, or if 32 | you modify it: responsibilities to respect the freedom of others. 33 | 34 | For example, if you distribute copies of such a program, whether 35 | gratis or for a fee, you must pass on to the recipients the same 36 | freedoms that you received. You must make sure that they, too, receive 37 | or can get the source code. And you must show them these terms so they 38 | know their rights. 39 | 40 | Developers that use the GNU GPL protect your rights with two steps: 41 | (1) assert copyright on the software, and (2) offer you this License 42 | giving you legal permission to copy, distribute and/or modify it. 43 | 44 | For the developers' and authors' protection, the GPL clearly explains 45 | that there is no warranty for this free software. For both users' and 46 | authors' sake, the GPL requires that modified versions be marked as 47 | changed, so that their problems will not be attributed erroneously to 48 | authors of previous versions. 49 | 50 | Some devices are designed to deny users access to install or run 51 | modified versions of the software inside them, although the manufacturer 52 | can do so. This is fundamentally incompatible with the aim of 53 | protecting users' freedom to change the software. The systematic 54 | pattern of such abuse occurs in the area of products for individuals to 55 | use, which is precisely where it is most unacceptable. Therefore, we 56 | have designed this version of the GPL to prohibit the practice for those 57 | products. If such problems arise substantially in other domains, we 58 | stand ready to extend this provision to those domains in future versions 59 | of the GPL, as needed to protect the freedom of users. 60 | 61 | Finally, every program is threatened constantly by software patents. 62 | States should not allow patents to restrict development and use of 63 | software on general-purpose computers, but in those that do, we wish to 64 | avoid the special danger that patents applied to a free program could 65 | make it effectively proprietary. To prevent this, the GPL assures that 66 | patents cannot be used to render the program non-free. 67 | 68 | The precise terms and conditions for copying, distribution and 69 | modification follow. 70 | 71 | TERMS AND CONDITIONS 72 | 73 | 0. Definitions. 74 | 75 | "This License" refers to version 3 of the GNU General Public License. 76 | 77 | "Copyright" also means copyright-like laws that apply to other kinds of 78 | works, such as semiconductor masks. 79 | 80 | "The Program" refers to any copyrightable work licensed under this 81 | License. Each licensee is addressed as "you". "Licensees" and 82 | "recipients" may be individuals or organizations. 83 | 84 | To "modify" a work means to copy from or adapt all or part of the work 85 | in a fashion requiring copyright permission, other than the making of an 86 | exact copy. The resulting work is called a "modified version" of the 87 | earlier work or a work "based on" the earlier work. 88 | 89 | A "covered work" means either the unmodified Program or a work based 90 | on the Program. 91 | 92 | To "propagate" a work means to do anything with it that, without 93 | permission, would make you directly or secondarily liable for 94 | infringement under applicable copyright law, except executing it on a 95 | computer or modifying a private copy. Propagation includes copying, 96 | distribution (with or without modification), making available to the 97 | public, and in some countries other activities as well. 98 | 99 | To "convey" a work means any kind of propagation that enables other 100 | parties to make or receive copies. Mere interaction with a user through 101 | a computer network, with no transfer of a copy, is not conveying. 102 | 103 | An interactive user interface displays "Appropriate Legal Notices" 104 | to the extent that it includes a convenient and prominently visible 105 | feature that (1) displays an appropriate copyright notice, and (2) 106 | tells the user that there is no warranty for the work (except to the 107 | extent that warranties are provided), that licensees may convey the 108 | work under this License, and how to view a copy of this License. If 109 | the interface presents a list of user commands or options, such as a 110 | menu, a prominent item in the list meets this criterion. 111 | 112 | 1. Source Code. 113 | 114 | The "source code" for a work means the preferred form of the work 115 | for making modifications to it. "Object code" means any non-source 116 | form of a work. 117 | 118 | A "Standard Interface" means an interface that either is an official 119 | standard defined by a recognized standards body, or, in the case of 120 | interfaces specified for a particular programming language, one that 121 | is widely used among developers working in that language. 122 | 123 | The "System Libraries" of an executable work include anything, other 124 | than the work as a whole, that (a) is included in the normal form of 125 | packaging a Major Component, but which is not part of that Major 126 | Component, and (b) serves only to enable use of the work with that 127 | Major Component, or to implement a Standard Interface for which an 128 | implementation is available to the public in source code form. A 129 | "Major Component", in this context, means a major essential component 130 | (kernel, window system, and so on) of the specific operating system 131 | (if any) on which the executable work runs, or a compiler used to 132 | produce the work, or an object code interpreter used to run it. 133 | 134 | The "Corresponding Source" for a work in object code form means all 135 | the source code needed to generate, install, and (for an executable 136 | work) run the object code and to modify the work, including scripts to 137 | control those activities. However, it does not include the work's 138 | System Libraries, or general-purpose tools or generally available free 139 | programs which are used unmodified in performing those activities but 140 | which are not part of the work. For example, Corresponding Source 141 | includes interface definition files associated with source files for 142 | the work, and the source code for shared libraries and dynamically 143 | linked subprograms that the work is specifically designed to require, 144 | such as by intimate data communication or control flow between those 145 | subprograms and other parts of the work. 146 | 147 | The Corresponding Source need not include anything that users 148 | can regenerate automatically from other parts of the Corresponding 149 | Source. 150 | 151 | The Corresponding Source for a work in source code form is that 152 | same work. 153 | 154 | 2. Basic Permissions. 155 | 156 | All rights granted under this License are granted for the term of 157 | copyright on the Program, and are irrevocable provided the stated 158 | conditions are met. This License explicitly affirms your unlimited 159 | permission to run the unmodified Program. The output from running a 160 | covered work is covered by this License only if the output, given its 161 | content, constitutes a covered work. This License acknowledges your 162 | rights of fair use or other equivalent, as provided by copyright law. 163 | 164 | You may make, run and propagate covered works that you do not 165 | convey, without conditions so long as your license otherwise remains 166 | in force. You may convey covered works to others for the sole purpose 167 | of having them make modifications exclusively for you, or provide you 168 | with facilities for running those works, provided that you comply with 169 | the terms of this License in conveying all material for which you do 170 | not control copyright. Those thus making or running the covered works 171 | for you must do so exclusively on your behalf, under your direction 172 | and control, on terms that prohibit them from making any copies of 173 | your copyrighted material outside their relationship with you. 174 | 175 | Conveying under any other circumstances is permitted solely under 176 | the conditions stated below. Sublicensing is not allowed; section 10 177 | makes it unnecessary. 178 | 179 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law. 180 | 181 | No covered work shall be deemed part of an effective technological 182 | measure under any applicable law fulfilling obligations under article 183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or 184 | similar laws prohibiting or restricting circumvention of such 185 | measures. 186 | 187 | When you convey a covered work, you waive any legal power to forbid 188 | circumvention of technological measures to the extent such circumvention 189 | is effected by exercising rights under this License with respect to 190 | the covered work, and you disclaim any intention to limit operation or 191 | modification of the work as a means of enforcing, against the work's 192 | users, your or third parties' legal rights to forbid circumvention of 193 | technological measures. 194 | 195 | 4. Conveying Verbatim Copies. 196 | 197 | You may convey verbatim copies of the Program's source code as you 198 | receive it, in any medium, provided that you conspicuously and 199 | appropriately publish on each copy an appropriate copyright notice; 200 | keep intact all notices stating that this License and any 201 | non-permissive terms added in accord with section 7 apply to the code; 202 | keep intact all notices of the absence of any warranty; and give all 203 | recipients a copy of this License along with the Program. 204 | 205 | You may charge any price or no price for each copy that you convey, 206 | and you may offer support or warranty protection for a fee. 207 | 208 | 5. Conveying Modified Source Versions. 209 | 210 | You may convey a work based on the Program, or the modifications to 211 | produce it from the Program, in the form of source code under the 212 | terms of section 4, provided that you also meet all of these conditions: 213 | 214 | a) The work must carry prominent notices stating that you modified 215 | it, and giving a relevant date. 216 | 217 | b) The work must carry prominent notices stating that it is 218 | released under this License and any conditions added under section 219 | 7. This requirement modifies the requirement in section 4 to 220 | "keep intact all notices". 221 | 222 | c) You must license the entire work, as a whole, under this 223 | License to anyone who comes into possession of a copy. This 224 | License will therefore apply, along with any applicable section 7 225 | additional terms, to the whole of the work, and all its parts, 226 | regardless of how they are packaged. This License gives no 227 | permission to license the work in any other way, but it does not 228 | invalidate such permission if you have separately received it. 229 | 230 | d) If the work has interactive user interfaces, each must display 231 | Appropriate Legal Notices; however, if the Program has interactive 232 | interfaces that do not display Appropriate Legal Notices, your 233 | work need not make them do so. 234 | 235 | A compilation of a covered work with other separate and independent 236 | works, which are not by their nature extensions of the covered work, 237 | and which are not combined with it such as to form a larger program, 238 | in or on a volume of a storage or distribution medium, is called an 239 | "aggregate" if the compilation and its resulting copyright are not 240 | used to limit the access or legal rights of the compilation's users 241 | beyond what the individual works permit. Inclusion of a covered work 242 | in an aggregate does not cause this License to apply to the other 243 | parts of the aggregate. 244 | 245 | 6. Conveying Non-Source Forms. 246 | 247 | You may convey a covered work in object code form under the terms 248 | of sections 4 and 5, provided that you also convey the 249 | machine-readable Corresponding Source under the terms of this License, 250 | in one of these ways: 251 | 252 | a) Convey the object code in, or embodied in, a physical product 253 | (including a physical distribution medium), accompanied by the 254 | Corresponding Source fixed on a durable physical medium 255 | customarily used for software interchange. 256 | 257 | b) Convey the object code in, or embodied in, a physical product 258 | (including a physical distribution medium), accompanied by a 259 | written offer, valid for at least three years and valid for as 260 | long as you offer spare parts or customer support for that product 261 | model, to give anyone who possesses the object code either (1) a 262 | copy of the Corresponding Source for all the software in the 263 | product that is covered by this License, on a durable physical 264 | medium customarily used for software interchange, for a price no 265 | more than your reasonable cost of physically performing this 266 | conveying of source, or (2) access to copy the 267 | Corresponding Source from a network server at no charge. 268 | 269 | c) Convey individual copies of the object code with a copy of the 270 | written offer to provide the Corresponding Source. This 271 | alternative is allowed only occasionally and noncommercially, and 272 | only if you received the object code with such an offer, in accord 273 | with subsection 6b. 274 | 275 | d) Convey the object code by offering access from a designated 276 | place (gratis or for a charge), and offer equivalent access to the 277 | Corresponding Source in the same way through the same place at no 278 | further charge. You need not require recipients to copy the 279 | Corresponding Source along with the object code. If the place to 280 | copy the object code is a network server, the Corresponding Source 281 | may be on a different server (operated by you or a third party) 282 | that supports equivalent copying facilities, provided you maintain 283 | clear directions next to the object code saying where to find the 284 | Corresponding Source. Regardless of what server hosts the 285 | Corresponding Source, you remain obligated to ensure that it is 286 | available for as long as needed to satisfy these requirements. 287 | 288 | e) Convey the object code using peer-to-peer transmission, provided 289 | you inform other peers where the object code and Corresponding 290 | Source of the work are being offered to the general public at no 291 | charge under subsection 6d. 292 | 293 | A separable portion of the object code, whose source code is excluded 294 | from the Corresponding Source as a System Library, need not be 295 | included in conveying the object code work. 296 | 297 | A "User Product" is either (1) a "consumer product", which means any 298 | tangible personal property which is normally used for personal, family, 299 | or household purposes, or (2) anything designed or sold for incorporation 300 | into a dwelling. In determining whether a product is a consumer product, 301 | doubtful cases shall be resolved in favor of coverage. For a particular 302 | product received by a particular user, "normally used" refers to a 303 | typical or common use of that class of product, regardless of the status 304 | of the particular user or of the way in which the particular user 305 | actually uses, or expects or is expected to use, the product. A product 306 | is a consumer product regardless of whether the product has substantial 307 | commercial, industrial or non-consumer uses, unless such uses represent 308 | the only significant mode of use of the product. 309 | 310 | "Installation Information" for a User Product means any methods, 311 | procedures, authorization keys, or other information required to install 312 | and execute modified versions of a covered work in that User Product from 313 | a modified version of its Corresponding Source. The information must 314 | suffice to ensure that the continued functioning of the modified object 315 | code is in no case prevented or interfered with solely because 316 | modification has been made. 317 | 318 | If you convey an object code work under this section in, or with, or 319 | specifically for use in, a User Product, and the conveying occurs as 320 | part of a transaction in which the right of possession and use of the 321 | User Product is transferred to the recipient in perpetuity or for a 322 | fixed term (regardless of how the transaction is characterized), the 323 | Corresponding Source conveyed under this section must be accompanied 324 | by the Installation Information. But this requirement does not apply 325 | if neither you nor any third party retains the ability to install 326 | modified object code on the User Product (for example, the work has 327 | been installed in ROM). 328 | 329 | The requirement to provide Installation Information does not include a 330 | requirement to continue to provide support service, warranty, or updates 331 | for a work that has been modified or installed by the recipient, or for 332 | the User Product in which it has been modified or installed. Access to a 333 | network may be denied when the modification itself materially and 334 | adversely affects the operation of the network or violates the rules and 335 | protocols for communication across the network. 336 | 337 | Corresponding Source conveyed, and Installation Information provided, 338 | in accord with this section must be in a format that is publicly 339 | documented (and with an implementation available to the public in 340 | source code form), and must require no special password or key for 341 | unpacking, reading or copying. 342 | 343 | 7. Additional Terms. 344 | 345 | "Additional permissions" are terms that supplement the terms of this 346 | License by making exceptions from one or more of its conditions. 347 | Additional permissions that are applicable to the entire Program shall 348 | be treated as though they were included in this License, to the extent 349 | that they are valid under applicable law. If additional permissions 350 | apply only to part of the Program, that part may be used separately 351 | under those permissions, but the entire Program remains governed by 352 | this License without regard to the additional permissions. 353 | 354 | When you convey a copy of a covered work, you may at your option 355 | remove any additional permissions from that copy, or from any part of 356 | it. (Additional permissions may be written to require their own 357 | removal in certain cases when you modify the work.) You may place 358 | additional permissions on material, added by you to a covered work, 359 | for which you have or can give appropriate copyright permission. 360 | 361 | Notwithstanding any other provision of this License, for material you 362 | add to a covered work, you may (if authorized by the copyright holders of 363 | that material) supplement the terms of this License with terms: 364 | 365 | a) Disclaiming warranty or limiting liability differently from the 366 | terms of sections 15 and 16 of this License; or 367 | 368 | b) Requiring preservation of specified reasonable legal notices or 369 | author attributions in that material or in the Appropriate Legal 370 | Notices displayed by works containing it; or 371 | 372 | c) Prohibiting misrepresentation of the origin of that material, or 373 | requiring that modified versions of such material be marked in 374 | reasonable ways as different from the original version; or 375 | 376 | d) Limiting the use for publicity purposes of names of licensors or 377 | authors of the material; or 378 | 379 | e) Declining to grant rights under trademark law for use of some 380 | trade names, trademarks, or service marks; or 381 | 382 | f) Requiring indemnification of licensors and authors of that 383 | material by anyone who conveys the material (or modified versions of 384 | it) with contractual assumptions of liability to the recipient, for 385 | any liability that these contractual assumptions directly impose on 386 | those licensors and authors. 387 | 388 | All other non-permissive additional terms are considered "further 389 | restrictions" within the meaning of section 10. If the Program as you 390 | received it, or any part of it, contains a notice stating that it is 391 | governed by this License along with a term that is a further 392 | restriction, you may remove that term. If a license document contains 393 | a further restriction but permits relicensing or conveying under this 394 | License, you may add to a covered work material governed by the terms 395 | of that license document, provided that the further restriction does 396 | not survive such relicensing or conveying. 397 | 398 | If you add terms to a covered work in accord with this section, you 399 | must place, in the relevant source files, a statement of the 400 | additional terms that apply to those files, or a notice indicating 401 | where to find the applicable terms. 402 | 403 | Additional terms, permissive or non-permissive, may be stated in the 404 | form of a separately written license, or stated as exceptions; 405 | the above requirements apply either way. 406 | 407 | 8. Termination. 408 | 409 | You may not propagate or modify a covered work except as expressly 410 | provided under this License. Any attempt otherwise to propagate or 411 | modify it is void, and will automatically terminate your rights under 412 | this License (including any patent licenses granted under the third 413 | paragraph of section 11). 414 | 415 | However, if you cease all violation of this License, then your 416 | license from a particular copyright holder is reinstated (a) 417 | provisionally, unless and until the copyright holder explicitly and 418 | finally terminates your license, and (b) permanently, if the copyright 419 | holder fails to notify you of the violation by some reasonable means 420 | prior to 60 days after the cessation. 421 | 422 | Moreover, your license from a particular copyright holder is 423 | reinstated permanently if the copyright holder notifies you of the 424 | violation by some reasonable means, this is the first time you have 425 | received notice of violation of this License (for any work) from that 426 | copyright holder, and you cure the violation prior to 30 days after 427 | your receipt of the notice. 428 | 429 | Termination of your rights under this section does not terminate the 430 | licenses of parties who have received copies or rights from you under 431 | this License. If your rights have been terminated and not permanently 432 | reinstated, you do not qualify to receive new licenses for the same 433 | material under section 10. 434 | 435 | 9. Acceptance Not Required for Having Copies. 436 | 437 | You are not required to accept this License in order to receive or 438 | run a copy of the Program. Ancillary propagation of a covered work 439 | occurring solely as a consequence of using peer-to-peer transmission 440 | to receive a copy likewise does not require acceptance. However, 441 | nothing other than this License grants you permission to propagate or 442 | modify any covered work. These actions infringe copyright if you do 443 | not accept this License. Therefore, by modifying or propagating a 444 | covered work, you indicate your acceptance of this License to do so. 445 | 446 | 10. Automatic Licensing of Downstream Recipients. 447 | 448 | Each time you convey a covered work, the recipient automatically 449 | receives a license from the original licensors, to run, modify and 450 | propagate that work, subject to this License. You are not responsible 451 | for enforcing compliance by third parties with this License. 452 | 453 | An "entity transaction" is a transaction transferring control of an 454 | organization, or substantially all assets of one, or subdividing an 455 | organization, or merging organizations. If propagation of a covered 456 | work results from an entity transaction, each party to that 457 | transaction who receives a copy of the work also receives whatever 458 | licenses to the work the party's predecessor in interest had or could 459 | give under the previous paragraph, plus a right to possession of the 460 | Corresponding Source of the work from the predecessor in interest, if 461 | the predecessor has it or can get it with reasonable efforts. 462 | 463 | You may not impose any further restrictions on the exercise of the 464 | rights granted or affirmed under this License. For example, you may 465 | not impose a license fee, royalty, or other charge for exercise of 466 | rights granted under this License, and you may not initiate litigation 467 | (including a cross-claim or counterclaim in a lawsuit) alleging that 468 | any patent claim is infringed by making, using, selling, offering for 469 | sale, or importing the Program or any portion of it. 470 | 471 | 11. Patents. 472 | 473 | A "contributor" is a copyright holder who authorizes use under this 474 | License of the Program or a work on which the Program is based. The 475 | work thus licensed is called the contributor's "contributor version". 476 | 477 | A contributor's "essential patent claims" are all patent claims 478 | owned or controlled by the contributor, whether already acquired or 479 | hereafter acquired, that would be infringed by some manner, permitted 480 | by this License, of making, using, or selling its contributor version, 481 | but do not include claims that would be infringed only as a 482 | consequence of further modification of the contributor version. For 483 | purposes of this definition, "control" includes the right to grant 484 | patent sublicenses in a manner consistent with the requirements of 485 | this License. 486 | 487 | Each contributor grants you a non-exclusive, worldwide, royalty-free 488 | patent license under the contributor's essential patent claims, to 489 | make, use, sell, offer for sale, import and otherwise run, modify and 490 | propagate the contents of its contributor version. 491 | 492 | In the following three paragraphs, a "patent license" is any express 493 | agreement or commitment, however denominated, not to enforce a patent 494 | (such as an express permission to practice a patent or covenant not to 495 | sue for patent infringement). To "grant" such a patent license to a 496 | party means to make such an agreement or commitment not to enforce a 497 | patent against the party. 498 | 499 | If you convey a covered work, knowingly relying on a patent license, 500 | and the Corresponding Source of the work is not available for anyone 501 | to copy, free of charge and under the terms of this License, through a 502 | publicly available network server or other readily accessible means, 503 | then you must either (1) cause the Corresponding Source to be so 504 | available, or (2) arrange to deprive yourself of the benefit of the 505 | patent license for this particular work, or (3) arrange, in a manner 506 | consistent with the requirements of this License, to extend the patent 507 | license to downstream recipients. "Knowingly relying" means you have 508 | actual knowledge that, but for the patent license, your conveying the 509 | covered work in a country, or your recipient's use of the covered work 510 | in a country, would infringe one or more identifiable patents in that 511 | country that you have reason to believe are valid. 512 | 513 | If, pursuant to or in connection with a single transaction or 514 | arrangement, you convey, or propagate by procuring conveyance of, a 515 | covered work, and grant a patent license to some of the parties 516 | receiving the covered work authorizing them to use, propagate, modify 517 | or convey a specific copy of the covered work, then the patent license 518 | you grant is automatically extended to all recipients of the covered 519 | work and works based on it. 520 | 521 | A patent license is "discriminatory" if it does not include within 522 | the scope of its coverage, prohibits the exercise of, or is 523 | conditioned on the non-exercise of one or more of the rights that are 524 | specifically granted under this License. You may not convey a covered 525 | work if you are a party to an arrangement with a third party that is 526 | in the business of distributing software, under which you make payment 527 | to the third party based on the extent of your activity of conveying 528 | the work, and under which the third party grants, to any of the 529 | parties who would receive the covered work from you, a discriminatory 530 | patent license (a) in connection with copies of the covered work 531 | conveyed by you (or copies made from those copies), or (b) primarily 532 | for and in connection with specific products or compilations that 533 | contain the covered work, unless you entered into that arrangement, 534 | or that patent license was granted, prior to 28 March 2007. 535 | 536 | Nothing in this License shall be construed as excluding or limiting 537 | any implied license or other defenses to infringement that may 538 | otherwise be available to you under applicable patent law. 539 | 540 | 12. No Surrender of Others' Freedom. 541 | 542 | If conditions are imposed on you (whether by court order, agreement or 543 | otherwise) that contradict the conditions of this License, they do not 544 | excuse you from the conditions of this License. If you cannot convey a 545 | covered work so as to satisfy simultaneously your obligations under this 546 | License and any other pertinent obligations, then as a consequence you may 547 | not convey it at all. For example, if you agree to terms that obligate you 548 | to collect a royalty for further conveying from those to whom you convey 549 | the Program, the only way you could satisfy both those terms and this 550 | License would be to refrain entirely from conveying the Program. 551 | 552 | 13. Use with the GNU Affero General Public License. 553 | 554 | Notwithstanding any other provision of this License, you have 555 | permission to link or combine any covered work with a work licensed 556 | under version 3 of the GNU Affero General Public License into a single 557 | combined work, and to convey the resulting work. The terms of this 558 | License will continue to apply to the part which is the covered work, 559 | but the special requirements of the GNU Affero General Public License, 560 | section 13, concerning interaction through a network will apply to the 561 | combination as such. 562 | 563 | 14. Revised Versions of this License. 564 | 565 | The Free Software Foundation may publish revised and/or new versions of 566 | the GNU General Public License from time to time. Such new versions will 567 | be similar in spirit to the present version, but may differ in detail to 568 | address new problems or concerns. 569 | 570 | Each version is given a distinguishing version number. If the 571 | Program specifies that a certain numbered version of the GNU General 572 | Public License "or any later version" applies to it, you have the 573 | option of following the terms and conditions either of that numbered 574 | version or of any later version published by the Free Software 575 | Foundation. If the Program does not specify a version number of the 576 | GNU General Public License, you may choose any version ever published 577 | by the Free Software Foundation. 578 | 579 | If the Program specifies that a proxy can decide which future 580 | versions of the GNU General Public License can be used, that proxy's 581 | public statement of acceptance of a version permanently authorizes you 582 | to choose that version for the Program. 583 | 584 | Later license versions may give you additional or different 585 | permissions. However, no additional obligations are imposed on any 586 | author or copyright holder as a result of your choosing to follow a 587 | later version. 588 | 589 | 15. Disclaimer of Warranty. 590 | 591 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY 592 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT 593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY 594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, 595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 596 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM 597 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF 598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 599 | 600 | 16. Limitation of Liability. 601 | 602 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS 604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY 605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE 606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF 607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD 608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), 609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF 610 | SUCH DAMAGES. 611 | 612 | 17. Interpretation of Sections 15 and 16. 613 | 614 | If the disclaimer of warranty and limitation of liability provided 615 | above cannot be given local legal effect according to their terms, 616 | reviewing courts shall apply local law that most closely approximates 617 | an absolute waiver of all civil liability in connection with the 618 | Program, unless a warranty or assumption of liability accompanies a 619 | copy of the Program in return for a fee. 620 | 621 | END OF TERMS AND CONDITIONS 622 | 623 | How to Apply These Terms to Your New Programs 624 | 625 | If you develop a new program, and you want it to be of the greatest 626 | possible use to the public, the best way to achieve this is to make it 627 | free software which everyone can redistribute and change under these terms. 628 | 629 | To do so, attach the following notices to the program. It is safest 630 | to attach them to the start of each source file to most effectively 631 | state the exclusion of warranty; and each file should have at least 632 | the "copyright" line and a pointer to where the full notice is found. 633 | 634 | 635 | Copyright (C) 636 | 637 | This program is free software: you can redistribute it and/or modify 638 | it under the terms of the GNU General Public License as published by 639 | the Free Software Foundation, either version 3 of the License, or 640 | (at your option) any later version. 641 | 642 | This program is distributed in the hope that it will be useful, 643 | but WITHOUT ANY WARRANTY; without even the implied warranty of 644 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 645 | GNU General Public License for more details. 646 | 647 | You should have received a copy of the GNU General Public License 648 | along with this program. If not, see . 649 | 650 | Also add information on how to contact you by electronic and paper mail. 651 | 652 | If the program does terminal interaction, make it output a short 653 | notice like this when it starts in an interactive mode: 654 | 655 | Copyright (C) 656 | This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 657 | This is free software, and you are welcome to redistribute it 658 | under certain conditions; type `show c' for details. 659 | 660 | The hypothetical commands `show w' and `show c' should show the appropriate 661 | parts of the General Public License. Of course, your program's commands 662 | might be different; for a GUI interface, you would use an "about box". 663 | 664 | You should also get your employer (if you work as a programmer) or school, 665 | if any, to sign a "copyright disclaimer" for the program, if necessary. 666 | For more information on this, and how to apply and follow the GNU GPL, see 667 | . 668 | 669 | The GNU General Public License does not permit incorporating your program 670 | into proprietary programs. If your program is a subroutine library, you 671 | may consider it more useful to permit linking proprietary applications with 672 | the library. If this is what you want to do, use the GNU Lesser General 673 | Public License instead of this License. But first, please read 674 | . 675 | --------------------------------------------------------------------------------