├── .gitignore ├── CHANGELOG.md ├── README.md ├── all_pdf.sh ├── all_translate.sh ├── convert_png_to_pdf.py ├── download_video.py ├── font.py ├── main.py ├── silmilar.py ├── subtitle.py ├── transcript.py ├── translate_srt.py ├── translate_txt.py ├── video2slide.py ├── video_to_images.py ├── video_to_srt.py ├── you_dt.py └── youtube2slide.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.pdf 2 | *.png 3 | .DS_Store 4 | *.mp4 5 | *.csv 6 | *.srt 7 | __pycache__/ 8 | *.tsv 9 | *.json 10 | *.txt 11 | *.vtt 12 | .specstory 13 | *.jpg 14 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | * 7/17 下載字幕改用 `whisper-ctranslate2` 下載英文字木 2 | * 7/17 合成字幕改為字幕的中間點 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Video2PDF 2 | 3 | 適合將任何線上影片、課程壓制成一格一格播放的 PDF 4 | 5 | ## pre-Installation 6 | 7 | 需要先安裝 `moviepy` 8 | ``` 9 | pip install moviepy 10 | ``` 11 | 12 | ### 執行 13 | #### 功能 1. 壓制 14 | `python main.py xxx.mp4` 15 | 16 | * 需要兩個檔案 mp4, srt (不管有沒有內嵌字幕檔,都需要 srt 當時間參考點) 17 | * srt 必須是 `xxx.zh.srt` 18 | * 將同名的 mp4 與同名的 srt 放在一起,執行 `python main.py xxx.mp4` 等待一定時間即會產生 pdf 19 | * 如果影片已經有預設 srt 不需 srt 壓制進去只需要檔參考點,請用 `python main.py xxx.mp4 --embed` 20 | 21 | #### 功能 2. 下載 Youtube 影片並下載字幕、翻譯 22 | 23 | `python you_dt.py [youtube_url]` 24 | 25 | #### 功能 3. 純翻譯字幕 26 | 27 | `python translate_srt.py xxxx.srt` 28 | 29 | 30 | 31 | ### 注意事項 32 | 33 | * 檔案太大會遇到同時開啟個數限制 34 | * 先執行 `ulimit -n 4096` 可以解決 35 | 36 | ### 不喜歡指定字體可以換 37 | 38 | 執行 `python font.py` 察看你有哪些 font 可以用 39 | 40 | ### 推薦工具 41 | 42 | * 下載工具:yt-dlp 43 | * 下載字幕工具:YouTube™ 雙字幕 https://chrome.google.com/webstore/detail/youtube-dual-subtitles/hkbdddpiemdeibjoknnofflfgbgnebcm?hl=zh-TW 44 | * 聽譯字幕工具:https://goodsnooze.gumroad.com/l/macwhisper 45 | * 翻譯字幕工具:https://translatesubtitles.co/ 46 | 47 | 48 | ### TODO 49 | 50 | - [x] 雙語字幕 51 | - [x] 多核 CPU 平行處理 52 | - [ ] Streamlit UI 介面 53 | - [ ] 向量檢索 54 | - [ ] PDF searchable 55 | -------------------------------------------------------------------------------- /all_pdf.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # get list of mp4 files 4 | for i in *.mp4 5 | do 6 | # check if corresponding pdf exists 7 | pdf_name="${i%.*}.pdf" 8 | if [ ! -f "$pdf_name" ] 9 | then 10 | # pdf doesn't exist, print the file name 11 | echo "Processing file: $i" 12 | 13 | # run python script 14 | python main.py "$i" 15 | fi 16 | done 17 | -------------------------------------------------------------------------------- /all_translate.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # get list of srt files 4 | for i in *.srt 5 | do 6 | # Get the base name without extension 7 | base_name=$(basename "$i" .srt) 8 | 9 | # Check if this is already a .zh.srt file, if so, skip it 10 | if [[ $base_name == *.zh ]]; then 11 | continue 12 | fi 13 | 14 | # check if corresponding .zh.srt exists 15 | zh_srt_name="${base_name}.zh.srt" 16 | if [ ! -f "$zh_srt_name" ] 17 | then 18 | # .zh.srt doesn't exist, print the file name 19 | echo "Processing file: $i" 20 | 21 | # run python script 22 | python3 translate_srt.py "$i" 23 | fi 24 | done 25 | -------------------------------------------------------------------------------- /convert_png_to_pdf.py: -------------------------------------------------------------------------------- 1 | import os 2 | from PIL import Image 3 | import shutil 4 | 5 | def convert_png_to_pdf(input_directory, output_filename, post_fix_filename="_text"): 6 | output_file = output_filename + post_fix_filename + '.pdf' 7 | 8 | # Collect all images 9 | images = [] 10 | 11 | # Go through each image in the directory 12 | for filename in sorted(os.listdir(input_directory)): 13 | if filename.endswith((".png", ".jpg", ".jpeg")): # Add support for jpg/jpeg files 14 | # Open the image file 15 | img = Image.open(os.path.join(input_directory, filename)) 16 | # If image is not RGB, convert it to RGB 17 | if img.mode != "RGB": 18 | img = img.convert("RGB") 19 | images.append(img) 20 | 21 | # Save all images to a single PDF file 22 | if images: 23 | images[0].save(output_file, "PDF", resolution=100.0, save_all=True, append_images=images[1:]) 24 | print(f"PDF saved as: {output_file}") 25 | else: 26 | print(f"No images found in {input_directory}") 27 | 28 | # Delete the directory 29 | try: 30 | shutil.rmtree(input_directory) 31 | print("Directory deleted successfully.") 32 | except OSError as e: 33 | print("Error: %s : %s" % (input_directory, e.strerror)) 34 | 35 | # To use the function: 36 | # convert_png_to_pdf("/path/to/your/png/directory", "output_filename") 37 | -------------------------------------------------------------------------------- /download_video.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | import os 3 | import multiprocessing 4 | from urllib.parse import urlparse, parse_qs, urlunparse 5 | from youtube_transcript_api import YouTubeTranscriptApi 6 | from youtube_transcript_api.formatters import SRTFormatter 7 | 8 | def download_video(url): 9 | video_format = "bestvideo[height<=1080][ext=mp4]" # This will download mp4 video in 640x360 resolution 10 | output_template = "%(title)s.%(ext)s" # This will name the video file as "title.mp4" 11 | 12 | # Build the yt-dlp command 13 | command = [ 14 | "yt-dlp", 15 | "-f", video_format, 16 | "-o", output_template, 17 | url, 18 | ] 19 | 20 | # Execute the yt-dlp command 21 | subprocess.run(command, check=True) 22 | 23 | # Get the video title 24 | video_title = subprocess.check_output(["yt-dlp", "--get-filename", "-o", "%(title)s", url]) 25 | video_title = video_title.decode("utf-8").strip() 26 | video_filename = f"{video_title}.mp4" 27 | 28 | print(video_filename) 29 | # Call the generate_srt function 30 | 31 | 32 | 33 | return video_filename 34 | 35 | def clean_url(url): 36 | # 解析 URL 37 | parsed_url = urlparse(url) 38 | # 清理查询字符串参数 39 | query = parse_qs(parsed_url.query) 40 | # 重建 URL 41 | cleaned_url = urlunparse(( 42 | parsed_url.scheme, 43 | parsed_url.netloc, 44 | parsed_url.path, 45 | parsed_url.params, 46 | "&".join(["{}={}".format(k, v[0]) for k, v in query.items()]), 47 | parsed_url.fragment 48 | )) 49 | return cleaned_url 50 | 51 | def old_generate_srt(video_filename): 52 | # Generate srt using whisper-ctranslate2 53 | num_cores = multiprocessing.cpu_count() 54 | print("This notebook has access to {} cores".format(num_cores)) 55 | command_srt = [ 56 | "whisper-ctranslate2", 57 | "--threads", str(num_cores) , 58 | "--output_format", "srt", 59 | video_filename, 60 | ] 61 | 62 | # Execute the whisper-ctranslate2 command 63 | subprocess.run(command_srt, check=True) 64 | 65 | def generate_srt(video_filename, video_url): 66 | cleaned_url = clean_url(video_url) 67 | command = ["yt-dlp", "--print", "id", cleaned_url] 68 | 69 | # Execute the command and capture the output 70 | process = subprocess.run(command, capture_output=True, text=True) 71 | # Extract the video ID from the output 72 | video_id = process.stdout.strip() 73 | transcript =YouTubeTranscriptApi.get_transcript(video_id) 74 | 75 | formatter = SRTFormatter() 76 | srt_formatted = formatter.format_transcript(transcript) 77 | 78 | en_srt_name = video_filename.replace("mp4", "srt") 79 | 80 | with open(en_srt_name, 'w', encoding='utf-8') as srt_file: 81 | srt_file.write(srt_formatted) 82 | -------------------------------------------------------------------------------- /font.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | from moviepy.editor import * 4 | from moviepy.editor import TextClip 5 | 6 | print(TextClip.list("font")) 7 | print(TextClip.list('color')) 8 | # 列出你可以使用哪些 Font 9 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from transcript import process_video_subs 3 | from convert_png_to_pdf import convert_png_to_pdf 4 | from video_to_images import video_to_images 5 | import os 6 | import argparse 7 | import time # Add time module 8 | 9 | def main(): 10 | start_time = time.time() # Start timing 11 | 12 | # Set up argument parser 13 | parser = argparse.ArgumentParser(description="Process video and subtitles.") 14 | parser.add_argument('video_name', help="Name of the video file to process.") 15 | parser.add_argument('--embed', action='store_true', help="Use embedded subtitles.") 16 | 17 | # Parse arguments 18 | args = parser.parse_args() 19 | 20 | # Determine base name 21 | base_name = os.path.splitext(args.video_name)[0] 22 | 23 | # Process video subs and create screenshots 24 | if args.embed: 25 | process_video_subs(args.video_name) # with pre-embedd-sub 26 | else: 27 | video_to_images(args.video_name) # without pre-embedd-sub 28 | 29 | # Convert screenshots to PDF 30 | convert_png_to_pdf(base_name, base_name) 31 | 32 | # Calculate and print execution time 33 | end_time = time.time() 34 | duration = end_time - start_time 35 | print(f"\nTotal execution time: {duration:.2f} seconds ({duration/60:.2f} minutes)") 36 | 37 | os.system(f"open '{base_name}_text.pdf'") 38 | 39 | if __name__ == "__main__": 40 | main() 41 | -------------------------------------------------------------------------------- /silmilar.py: -------------------------------------------------------------------------------- 1 | from PIL import Image 2 | import imagehash 3 | import os 4 | 5 | def remove_duplicate_images(image_folder, hash_size=12, sim_threshold=10): 6 | """ 7 | Remove duplicate images in a folder based on image hashing. 8 | 9 | :param image_folder: Folder containing images to be checked. 10 | :param hash_size: Size of the hash, defaults to 8. 11 | :param sim_threshold: Similarity threshold for considering images as duplicates, defaults to 5. 12 | """ 13 | hashes = {} 14 | duplicates = [] 15 | 16 | for image_filename in os.listdir(image_folder): 17 | if image_filename.endswith(('.png', '.jpg', '.jpeg')): 18 | image_path = os.path.join(image_folder, image_filename) 19 | try: 20 | # Create a hash for each image 21 | with Image.open(image_path) as img: 22 | temp_hash = imagehash.average_hash(img, hash_size) 23 | 24 | # Check if the hash already exists in the dictionary 25 | if temp_hash in hashes: 26 | print(f"Duplicate found: {image_filename} is a duplicate of {hashes[temp_hash]}") 27 | duplicates.append(image_path) 28 | else: 29 | hashes[temp_hash] = image_filename 30 | 31 | except Exception as e: 32 | print(f"Error processing {image_filename}: {e}") 33 | 34 | # Optionally, remove the identified duplicate images 35 | for duplicate in duplicates: 36 | os.remove(duplicate) 37 | #print(f"Removed duplicate image: {duplicate}") 38 | -------------------------------------------------------------------------------- /subtitle.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from moviepy.editor import * 3 | 4 | # 從命令行獲取MP4檔案名稱 5 | video_file_name = sys.argv[1] 6 | # 由MP4檔案名稱產生對應的SRT檔案名稱 7 | subtitle_file_name = video_file_name.rsplit('.', 1)[0] + '.srt' 8 | 9 | # Load your video 10 | clip = VideoFileClip(video_file_name) 11 | 12 | # Load your subtitles 13 | with open(subtitle_file_name, "r") as f: 14 | subtitles = f.read() 15 | 16 | # Split subtitles into list 17 | subtitles = subtitles.split("\n\n") 18 | 19 | # Process each subtitle 20 | subs = [] 21 | for subtitle in subtitles: 22 | # Split by newline 23 | parts = subtitle.split("\n") 24 | if len(parts) >= 3: 25 | # Get start and end times 26 | times = parts[1].split(" --> ") 27 | start_time = times[0].split(":") 28 | end_time = times[1].split(":") 29 | start_time = int(start_time[0])*3600 + int(start_time[1])*60 + float(start_time[2].replace(",", ".")) 30 | end_time = int(end_time[0])*3600 + int(end_time[1])*60 + float(end_time[2].replace(",", ".")) 31 | # Get text 32 | text = " ".join(parts[2:]) 33 | # Create text clip 34 | text_clip = TextClip(text, fontsize=24, color='white').set_pos(('center', 'bottom')).set_duration(end_time - start_time).set_start(start_time) 35 | 36 | subs.append(text_clip) 37 | 38 | # Overlay subtitles on video 39 | final = CompositeVideoClip([clip] + subs) 40 | 41 | # Write the result to a file 42 | final.write_videofile(video_file_name.rsplit('.', 1)[0] + '_with_subs.mp4') 43 | -------------------------------------------------------------------------------- /transcript.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import subprocess 3 | import pandas as pd 4 | import os 5 | from datetime import timedelta 6 | import pysrt 7 | import sys 8 | from tqdm import tqdm 9 | 10 | def detect_hardware_acceleration(): 11 | try: 12 | # 尝试运行 ffmpeg 命令来列出支持的硬件加速方式 13 | result = subprocess.run(['ffmpeg', '-hwaccels'], capture_output=True, text=True) 14 | output = result.stdout 15 | 16 | # 检测 cuda 和 mps 支持 17 | if 'cuda' in output: 18 | return 'cuda' 19 | elif 'mps' in output: 20 | return 'mps' 21 | except Exception as e: 22 | print(f"检测硬件加速时出错: {e}") 23 | 24 | # 如果无法检测到支持的硬件加速,返回 None 25 | return None 26 | 27 | def process_video_subs(video_path): 28 | base_name = os.path.splitext(video_path)[0] 29 | os.makedirs(base_name, exist_ok=True) 30 | 31 | srt_path = f'{base_name}.srt' 32 | if not os.path.exists(srt_path): 33 | print(f'找不到字幕文件: {srt_path}') 34 | return 35 | 36 | subs = pysrt.open(srt_path) 37 | 38 | hw_accel = detect_hardware_acceleration() 39 | 40 | # 使用 tqdm 包裹 subs 41 | for i, sub in tqdm(enumerate(subs), total=len(subs), desc="处理中"): 42 | start = timedelta(hours=sub.start.hours, minutes=sub.start.minutes, seconds=sub.start.seconds, milliseconds=sub.start.milliseconds) 43 | end = timedelta(hours=sub.end.hours, minutes=sub.end.minutes, seconds=sub.end.seconds, milliseconds=sub.end.milliseconds) 44 | mid_time = start + (end - start) / 2 45 | 46 | hours, remainder = divmod(mid_time.seconds, 3600) 47 | minutes, seconds = divmod(remainder, 60) 48 | timestamp = '{:02}:{:02}:{:02}.{:03}'.format(hours, minutes, seconds, mid_time.microseconds // 1000) 49 | 50 | screenshot_filename = os.path.join(base_name, f'{i+1:04}.png') 51 | 52 | ffmpeg_cmd = ['ffmpeg', '-loglevel', 'error', '-ss', timestamp, '-i', video_path, '-vframes', '1', '-pix_fmt', 'yuv420p', screenshot_filename] 53 | 54 | if hw_accel: 55 | ffmpeg_cmd.insert(1, '-hwaccel') 56 | ffmpeg_cmd.insert(2, hw_accel) 57 | 58 | subprocess.run(ffmpeg_cmd) 59 | -------------------------------------------------------------------------------- /translate_srt.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import pysrt 3 | import concurrent.futures 4 | import os 5 | from tqdm import tqdm 6 | import sys 7 | import chardet 8 | # Your DeepL API key from the environment variable 9 | api_key = os.getenv('DEEPL_API_KEY') 10 | 11 | def translate_text(text, target_language='zh'): 12 | text = text.replace("\h", " ") 13 | base_url = 'https://api.deepl.com/v2/translate' 14 | payload = { 15 | 'auth_key': api_key, 16 | 'text': text, 17 | 'target_lang': target_language, 18 | } 19 | response = requests.post(base_url, data=payload) 20 | if response.status_code != 200: 21 | raise Exception('DeepL request failed with status code {}'.format(response.status_code)) 22 | translated_text = response.json()['translations'][0]['text'] 23 | return translated_text 24 | 25 | def translate_srt_file(file_path, target_language='zh'): 26 | # Load the .srt file 27 | 28 | with open(file_path, 'rb') as f: 29 | result = chardet.detect(f.read()) 30 | 31 | subs = pysrt.open(file_path, encoding=result['encoding']) 32 | 33 | # Translate each subtitle 34 | with concurrent.futures.ThreadPoolExecutor() as executor: 35 | future_to_sub = {executor.submit(translate_text, sub.text, target_language): sub for sub in subs} 36 | for future in tqdm(concurrent.futures.as_completed(future_to_sub), total=len(subs), desc='Translating subtitles'): 37 | sub = future_to_sub[future] 38 | try: 39 | translated_text = future.result() 40 | sub.text = translated_text 41 | except Exception as exc: 42 | print('%r generated an exception: %s' % (sub, exc)) 43 | 44 | # Save the translated .srt file 45 | subs.save(file_path.replace('.srt', '.zh.srt'), encoding='utf-8') 46 | 47 | 48 | if __name__ == "__main__": 49 | if len(sys.argv) != 2: 50 | print(f"Usage: {sys.argv[0]} filename") 51 | sys.exit(1) 52 | 53 | translate_srt_file(sys.argv[1]) 54 | -------------------------------------------------------------------------------- /translate_txt.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import os 3 | from concurrent.futures import ThreadPoolExecutor 4 | from tqdm import tqdm 5 | import sys 6 | # Your DeepL API key from the environment variable 7 | api_key = os.getenv('DEEPL_API_KEY') 8 | 9 | def translate_text(args): 10 | text, target_language = args 11 | text = text.replace("\h", " ") 12 | base_url = 'https://api.deepl.com/v2/translate' 13 | payload = { 14 | 'auth_key': api_key, 15 | 'text': text, 16 | 'target_lang': target_language, 17 | } 18 | response = requests.post(base_url, data=payload) 19 | if response.status_code != 200: 20 | print(f'DeepL request failed with status code {response.status_code} for text: {text}') 21 | return text 22 | translated_text = response.json()['translations'][0]['text'] 23 | return translated_text 24 | 25 | def parallel_translation(file_path, target_language='zh', num_workers=5): 26 | with open(file_path, 'r', encoding='utf-8') as file: 27 | content = file.read() 28 | 29 | sentences = content.split('.') 30 | with ThreadPoolExecutor(max_workers=num_workers) as executor: 31 | translated_sentences = list(tqdm(executor.map(translate_text, [(sentence, target_language) for sentence in sentences]), total=len(sentences))) 32 | 33 | translated_text = '\n'.join(translated_sentences) 34 | with open('translated_'+file_path, 'w', encoding='utf-8') as file: 35 | print(translated_text) 36 | file.write(translated_text) 37 | 38 | if __name__ == "__main__": 39 | if len(sys.argv) != 2: 40 | print(f"Usage: {sys.argv[0]} filename") 41 | sys.exit(1) 42 | 43 | parallel_translation(sys.argv[1]) 44 | -------------------------------------------------------------------------------- /video2slide.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | from download_video import download_video 4 | from download_video import generate_srt 5 | from download_video import old_generate_srt 6 | from transcript import process_video_subs 7 | from video_to_images import video_to_images 8 | from convert_png_to_pdf import convert_png_to_pdf 9 | from silmilar import remove_duplicate_images 10 | os.environ['SDL_AUDIODRIVER'] = 'dummy' 11 | 12 | if __name__ == "__main__": 13 | if len(sys.argv) != 2: 14 | print(f"Usage: {sys.argv[0]} [file name]") 15 | sys.exit(1) 16 | 17 | # 获取命令行提供的文件名 18 | file_name = sys.argv[1] 19 | 20 | # 处理字幕和创建截图 21 | process_video_subs(file_name) # with pre-embedded-sub 22 | base_name = os.path.splitext(file_name)[0] # 从文件名中去掉扩展名 23 | 24 | # 移除重复图片 25 | remove_duplicate_images(base_name) 26 | 27 | # 将截图转换为PDF 28 | convert_png_to_pdf(base_name, base_name, "_slide") 29 | -------------------------------------------------------------------------------- /video_to_images.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | import json 4 | import cv2 5 | import numpy as np 6 | from PIL import Image, ImageDraw, ImageFont 7 | from concurrent.futures import ThreadPoolExecutor 8 | from tqdm import tqdm 9 | import time # 添加 time 模組 10 | 11 | def get_video_dimensions(video_path): 12 | cmd = ["ffprobe", "-v", "error", "-select_streams", "v:0", 13 | "-show_entries", "stream=width,height,sample_aspect_ratio", "-of", "json", video_path] 14 | info = json.loads(subprocess.check_output(cmd).decode()) 15 | stream = info['streams'][0] 16 | w, h = stream['width'], stream['height'] 17 | sar = stream.get('sample_aspect_ratio', '1:1').split(':') 18 | return int(w * float(sar[0])/float(sar[1])), h 19 | 20 | def parse_subtitle(entry): 21 | lines = entry.strip().split('\n') 22 | if len(lines) < 3: return None, None, None 23 | start = sum(float(x)*60**i for i,x in enumerate(reversed(lines[1].split(' --> ')[0].replace(',','.').split(':')))) 24 | end = sum(float(x)*60**i for i,x in enumerate(reversed(lines[1].split(' --> ')[1].replace(',','.').split(':')))) 25 | return start, end, ' '.join(lines[2:]) 26 | 27 | def init_process(font_path, font_size): 28 | global font 29 | font = ImageFont.truetype(font_path, font_size) 30 | 31 | def process_task(args): 32 | i, start, text, out_dir, width, height, y = args 33 | cap = cv2.VideoCapture(video_path_global) 34 | if not cap.isOpened(): 35 | print(f"Error: Could not open video file for frame {i}") 36 | return i 37 | 38 | cap.set(cv2.CAP_PROP_POS_MSEC, start*1000) 39 | ret, frame = cap.read() 40 | 41 | if not ret or frame is None: 42 | print(f"Error: Could not read frame {i} at time {start} seconds") 43 | cap.release() 44 | return i 45 | 46 | try: 47 | img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) 48 | draw = ImageDraw.Draw(img) 49 | 50 | # 獲取字體度量 51 | ascent, descent = font.getmetrics() 52 | text_vertical_span = ascent + descent 53 | 54 | # 計算文字寬度 55 | text_width = draw.textlength(text, font=font) 56 | 57 | # 計算背景矩形參數 58 | padding = 5 59 | rect_left = (width - text_width) // 2 - padding 60 | rect_top = y - ascent - padding 61 | rect_right = rect_left + text_width + padding * 2 62 | rect_bottom = rect_top + text_vertical_span + padding * 2 63 | 64 | # 繪製背景 65 | draw.rectangle([(rect_left, rect_top), (rect_right, rect_bottom)], fill='black') 66 | 67 | # 計算文字位置(水平居中,垂直居中) 68 | text_x = (width - text_width) // 2 69 | text_y = rect_top + padding 70 | 71 | # 繪製文字 72 | draw.text((text_x, text_y), text, fill=(255,255,0), font=font) 73 | 74 | img.save(f"{out_dir}/{i:04d}.jpg", quality=95) 75 | except Exception as e: 76 | print(f"Error processing frame {i}: {str(e)}") 77 | finally: 78 | cap.release() 79 | return i 80 | 81 | def video_to_images(video_path): 82 | start_time = time.time() # 開始計時 83 | 84 | global video_path_global 85 | video_path_global = video_path 86 | base = os.path.splitext(video_path)[0] 87 | os.makedirs(base, exist_ok=True) 88 | 89 | with open(f"{base}.zh.srt", 'r', encoding='utf-8') as f: 90 | subs = [parse_subtitle(entry) for entry in f.read().split('\n\n') if entry.strip()] 91 | 92 | w, h = get_video_dimensions(video_path) 93 | scale = max(w, h) / 1920 94 | font_size = int(50 * scale) 95 | y_pos = h - 100 if w > 640 else h - 30 96 | 97 | # Try different font paths 98 | possible_font_paths = [ 99 | "Noto-Sans-CJK-SC.ttf", 100 | "/System/Library/Fonts/PingFang.ttc", # macOS default Chinese font 101 | "/System/Library/Fonts/Arial Unicode.ttf", # Another common option 102 | "/usr/share/fonts/truetype/noto/NotoSansCJK-Regular.ttc" # Linux path 103 | ] 104 | 105 | font_path = None 106 | for path in possible_font_paths: 107 | if os.path.exists(path): 108 | font_path = path 109 | break 110 | 111 | if font_path is None: 112 | raise FileNotFoundError("Could not find a suitable font. Please install Noto Sans CJK or specify a valid font path.") 113 | 114 | tasks = [(i, s, t, base, w, h, y_pos) for i, (s, e, t) in enumerate(subs) if t] 115 | 116 | with ThreadPoolExecutor(initializer=init_process, initargs=(font_path, font_size), 117 | max_workers=os.cpu_count()*2) as executor: 118 | list(tqdm(executor.map(process_task, tasks), total=len(tasks))) 119 | 120 | end_time = time.time() # 結束計時 121 | duration = end_time - start_time 122 | print(f"\n處理完成!") 123 | print(f"總共處理了 {len(tasks)} 張圖片") 124 | print(f"總耗時: {duration:.2f} 秒") 125 | print(f"平均每張圖片處理時間: {duration/len(tasks):.2f} 秒") -------------------------------------------------------------------------------- /video_to_srt.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import math 4 | import subprocess 5 | import json 6 | import tempfile 7 | from datetime import datetime, timedelta 8 | from pathlib import Path 9 | import logging 10 | from groq import Groq 11 | 12 | # 设置日志 13 | logging.basicConfig(level=logging.INFO) 14 | logger = logging.getLogger(__name__) 15 | 16 | def format_timestamp(seconds): 17 | """将秒数转换为 SRT 时间格式""" 18 | hours = int(seconds // 3600) 19 | minutes = int((seconds % 3600) // 60) 20 | secs = int(seconds % 60) 21 | msecs = int((seconds - int(seconds)) * 1000) 22 | return f"{hours:02d}:{minutes:02d}:{secs:02d},{msecs:03d}" 23 | 24 | def get_video_duration(filename): 25 | """使用 ffprobe 获取视频时长(秒)""" 26 | cmd = [ 27 | 'ffprobe', 28 | '-v', 'quiet', 29 | '-print_format', 'json', 30 | '-show_format', 31 | '-show_streams', 32 | str(filename) 33 | ] 34 | try: 35 | result = subprocess.run(cmd, capture_output=True, text=True) 36 | data = json.loads(result.stdout) 37 | return float(data['format']['duration']) 38 | except Exception as e: 39 | logger.error(f"获取视频时长失败: {str(e)}") 40 | return None 41 | 42 | def split_video(input_file, segment_length=600): 43 | """将视频分割成指定长度的片段""" 44 | duration = get_video_duration(input_file) 45 | if not duration: 46 | return None 47 | 48 | segments = [] 49 | temp_dir = tempfile.mkdtemp() 50 | 51 | for i in range(0, int(duration), segment_length): 52 | output_file = Path(temp_dir) / f"segment_{i:04d}.m4a" 53 | cmd = [ 54 | 'ffmpeg', 55 | '-i', str(input_file), 56 | '-ss', str(i), 57 | '-t', str(segment_length), 58 | '-vn', 59 | '-acodec', 'copy', 60 | str(output_file), 61 | '-y' 62 | ] 63 | try: 64 | subprocess.run(cmd, check=True, capture_output=True) 65 | segments.append({ 66 | 'file': output_file, 67 | 'start_time': i 68 | }) 69 | except subprocess.CalledProcessError as e: 70 | logger.error(f"分割视频失败: {str(e)}") 71 | return None 72 | 73 | return segments 74 | 75 | def transcribe_segment(client, segment): 76 | """转录单个视频片段""" 77 | try: 78 | logger.info(f"开始转录片段: {segment['file']}") 79 | with open(segment['file'], "rb") as file: 80 | transcription = client.audio.transcriptions.create( 81 | file=(str(segment['file']), file.read()), 82 | model="whisper-large-v3", 83 | response_format="verbose_json", 84 | prompt="这是中文字幕" 85 | ) 86 | logger.info(f"成功转录片段: {segment['file']}") 87 | return transcription.segments 88 | except Exception as e: 89 | logger.error(f"转录片段失败 {segment['file']}: {str(e)}") 90 | return None 91 | 92 | def create_srt(segments, start_time_offset=0): 93 | """将转录结果转换为 SRT 格式""" 94 | srt_content = [] 95 | counter = 1 96 | 97 | for segment in segments: 98 | start_time = start_time_offset + float(segment['start']) 99 | end_time = start_time_offset + float(segment['end']) 100 | text = segment['text'].strip() 101 | 102 | srt_content.append(f"{counter}\n{format_timestamp(start_time)} --> {format_timestamp(end_time)}\n{text}\n") 103 | counter += 1 104 | 105 | return "\n".join(srt_content) 106 | 107 | def create_txt(segments): 108 | """将转录结果转换为纯文本格式""" 109 | return "\n".join(segment['text'].strip() for segment in segments) 110 | 111 | def check_environment(): 112 | api_key = os.getenv('GROQ_API_KEY') 113 | if not api_key: 114 | print("错误:未找到 GROQ_API_KEY 环境变量") 115 | print("请确保您已经设置了环境变量,可以通过以下方式之一设置:") 116 | print("1. 在终端执行: export GROQ_API_KEY='您的API密钥'") 117 | print("2. 在 .zshrc 或 .bashrc 中添加后,执行 source ~/.zshrc 或 source ~/.bashrc") 118 | sys.exit(1) 119 | return api_key 120 | 121 | def main(): 122 | if len(sys.argv) != 2: 123 | print("用法: python video_to_srt.py <输入视频文件>") 124 | sys.exit(1) 125 | 126 | api_key = check_environment() 127 | input_file = Path(sys.argv[1]) 128 | if not input_file.exists(): 129 | logger.error(f"Input file not found: {input_file}") 130 | sys.exit(1) 131 | 132 | try: 133 | client = Groq(api_key=api_key) 134 | logger.info("Groq client initialized successfully") 135 | 136 | # 分割视频 137 | logger.info("开始分割视频...") 138 | segments = split_video(input_file) 139 | if not segments: 140 | raise Exception("视频分割失败") 141 | 142 | # 转录每个片段 143 | logger.info("开始转录片段...") 144 | final_srt = "" 145 | all_transcriptions = [] # 存储所有转录结果 146 | 147 | for segment in segments: 148 | transcription = transcribe_segment(client, segment) 149 | if not transcription: 150 | raise Exception(f"转录失败: {segment['file']}") 151 | 152 | srt_content = create_srt(transcription, segment['start_time']) 153 | final_srt += srt_content + "\n" 154 | all_transcriptions.extend(transcription) # 收集所有转录结果 155 | 156 | # 删除临时文件 157 | os.unlink(segment['file']) 158 | 159 | # 保存SRT文件(中文) 160 | srt_file = input_file.with_suffix('.zh.srt') 161 | with open(srt_file, 'w', encoding='utf-8') as f: 162 | f.write(final_srt) 163 | logger.info(f"字幕文件已保存为: {srt_file}") 164 | 165 | # 保存纯文本文件 166 | txt_file = input_file.with_suffix('.txt') 167 | with open(txt_file, 'w', encoding='utf-8') as f: 168 | f.write(create_txt(all_transcriptions)) 169 | logger.info(f"文本文件已保存为: {txt_file}") 170 | 171 | except Exception as e: 172 | logger.error(f"处理失败: {str(e)}") 173 | sys.exit(1) 174 | 175 | if __name__ == "__main__": 176 | main() -------------------------------------------------------------------------------- /you_dt.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | from download_video import download_video 4 | from download_video import generate_srt 5 | from translate_srt import translate_srt_file 6 | from video_to_images import video_to_images 7 | from convert_png_to_pdf import convert_png_to_pdf 8 | os.environ['SDL_AUDIODRIVER'] = 'dummy' 9 | 10 | 11 | if __name__ == "__main__": 12 | if len(sys.argv) != 2: 13 | print(f"Usage: {sys.argv[0]} [youtube url]") 14 | sys.exit(1) 15 | 16 | # 擷取影片和字幕檔案 17 | file_name = download_video(sys.argv[1]) 18 | generate_srt(file_name,sys.argv[1]) 19 | # xxx.en.srt 20 | en_srt_name = file_name.replace("mp4", "srt") 21 | translate_srt_file(en_srt_name) 22 | # 取得影片檔案名稱和基本名稱 23 | 24 | video_file_name = file_name 25 | # xxx.en.srt => xxx.mp4 26 | 27 | base_name = os.path.splitext(video_file_name)[0] 28 | # xxx.mp4 => xxx 29 | 30 | # 將影片轉換為圖片 31 | video_to_images(video_file_name) 32 | 33 | # 轉換圖片為PDF 34 | convert_png_to_pdf(base_name, base_name) 35 | -------------------------------------------------------------------------------- /youtube2slide.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | from download_video import download_video 4 | from download_video import generate_srt 5 | from download_video import old_generate_srt 6 | from transcript import process_video_subs 7 | from video_to_images import video_to_images 8 | from convert_png_to_pdf import convert_png_to_pdf 9 | from silmilar import remove_duplicate_images 10 | os.environ['SDL_AUDIODRIVER'] = 'dummy' 11 | 12 | 13 | if __name__ == "__main__": 14 | if len(sys.argv) != 2: 15 | print(f"Usage: {sys.argv[0]} [youtube url]") 16 | sys.exit(1) 17 | 18 | # 擷取影片和字幕檔案 19 | file_name = download_video(sys.argv[1]) 20 | generate_srt(file_name,sys.argv[1]) 21 | 22 | # 取得影片檔案名稱和基本名稱 23 | 24 | video_file_name = file_name 25 | # xxx.en.srt => xxx.mp4 26 | 27 | base_name = os.path.splitext(video_file_name)[0] 28 | # xxx.mp4 => xxx 29 | 30 | # 將影片轉換為圖片 31 | # Process video subs and create screenshots 32 | process_video_subs(file_name) # with pre-embedd-sub 33 | remove_duplicate_images(base_name) 34 | # Convert screenshots to PDF 35 | convert_png_to_pdf(base_name, base_name, "_slide") 36 | --------------------------------------------------------------------------------