├── .gitignore
├── CHANGELOG.md
├── README.md
├── all_pdf.sh
├── all_translate.sh
├── convert_png_to_pdf.py
├── download_video.py
├── font.py
├── main.py
├── silmilar.py
├── subtitle.py
├── transcript.py
├── translate_srt.py
├── translate_txt.py
├── video2slide.py
├── video_to_images.py
├── video_to_srt.py
├── you_dt.py
└── youtube2slide.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | *.pdf
 2 | *.png
 3 | .DS_Store
 4 | *.mp4
 5 | *.csv
 6 | *.srt
 7 | __pycache__/
 8 | *.tsv
 9 | *.json
10 | *.txt
11 | *.vtt
12 | .specstory
13 | *.jpg
14 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | * 7/17 下載字幕改用 `whisper-ctranslate2` 下載英文字木
2 | * 7/17 合成字幕改為字幕的中間點
3 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Video2PDF
 2 | 
 3 | 適合將任何線上影片、課程壓制成一格一格播放的 PDF
 4 | 
 5 | ## pre-Installation
 6 | 
 7 | 需要先安裝 `moviepy`
 8 | ```
 9 | pip install moviepy
10 | ```
11 | 
12 | ### 執行
13 | #### 功能 1. 壓制
14 | `python main.py xxx.mp4`
15 | 
16 | * 需要兩個檔案 mp4, srt （不管有沒有內嵌字幕檔，都需要 srt 當時間參考點）
17 | * srt 必須是 `xxx.zh.srt`
18 | * 將同名的 mp4 與同名的 srt 放在一起，執行 `python main.py xxx.mp4` 等待一定時間即會產生 pdf
19 | * 如果影片已經有預設 srt 不需 srt 壓制進去只需要檔參考點，請用 `python main.py xxx.mp4 --embed`
20 | 
21 | #### 功能 2. 下載 Youtube 影片並下載字幕、翻譯
22 | 
23 | `python you_dt.py [youtube_url]`
24 | 
25 | #### 功能 3. 純翻譯字幕
26 | 
27 | `python translate_srt.py xxxx.srt`
28 | 
29 | 
30 | 
31 | ### 注意事項
32 | 
33 | * 檔案太大會遇到同時開啟個數限制
34 | * 先執行 `ulimit -n 4096` 可以解決
35 | 
36 | ### 不喜歡指定字體可以換
37 | 
38 | 執行 `python font.py` 察看你有哪些 font 可以用
39 | 
40 | ### 推薦工具
41 | 
42 | * 下載工具：yt-dlp
43 | * 下載字幕工具：YouTube™ 雙字幕 https://chrome.google.com/webstore/detail/youtube-dual-subtitles/hkbdddpiemdeibjoknnofflfgbgnebcm?hl=zh-TW
44 | * 聽譯字幕工具：https://goodsnooze.gumroad.com/l/macwhisper
45 | * 翻譯字幕工具：https://translatesubtitles.co/
46 | 
47 | 
48 | ### TODO
49 | 
50 | - [x] 雙語字幕
51 | - [x] 多核 CPU 平行處理
52 | - [ ] Streamlit UI 介面
53 | - [ ] 向量檢索
54 | - [ ] PDF searchable
55 | 


--------------------------------------------------------------------------------
/all_pdf.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # get list of mp4 files
 4 | for i in *.mp4
 5 | do
 6 |   # check if corresponding pdf exists
 7 |   pdf_name="${i%.*}.pdf"
 8 |   if [ ! -f "$pdf_name" ]
 9 |   then
10 |     # pdf doesn't exist, print the file name
11 |     echo "Processing file: $i"
12 | 
13 |     # run python script
14 |     python main.py "$i"
15 |   fi
16 | done
17 | 


--------------------------------------------------------------------------------
/all_translate.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # get list of srt files
 4 | for i in *.srt
 5 | do
 6 |   # Get the base name without extension
 7 |   base_name=$(basename "$i" .srt)
 8 | 
 9 |   # Check if this is already a .zh.srt file, if so, skip it
10 |   if [[ $base_name == *.zh ]]; then
11 |     continue
12 |   fi
13 | 
14 |   # check if corresponding .zh.srt exists
15 |   zh_srt_name="${base_name}.zh.srt"
16 |   if [ ! -f "$zh_srt_name" ]
17 |   then
18 |     # .zh.srt doesn't exist, print the file name
19 |     echo "Processing file: $i"
20 | 
21 |     # run python script
22 |     python3 translate_srt.py "$i"
23 |   fi
24 | done
25 | 


--------------------------------------------------------------------------------
/convert_png_to_pdf.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from PIL import Image
 3 | import shutil
 4 | 
 5 | def convert_png_to_pdf(input_directory, output_filename, post_fix_filename="_text"):
 6 |     output_file = output_filename + post_fix_filename + '.pdf'
 7 | 
 8 |     # Collect all images
 9 |     images = []
10 | 
11 |     # Go through each image in the directory
12 |     for filename in sorted(os.listdir(input_directory)):
13 |         if filename.endswith((".png", ".jpg", ".jpeg")):  # Add support for jpg/jpeg files
14 |             # Open the image file
15 |             img = Image.open(os.path.join(input_directory, filename))
16 |             # If image is not RGB, convert it to RGB
17 |             if img.mode != "RGB":
18 |                 img = img.convert("RGB")
19 |             images.append(img)
20 | 
21 |     # Save all images to a single PDF file
22 |     if images:
23 |         images[0].save(output_file, "PDF", resolution=100.0, save_all=True, append_images=images[1:])
24 |         print(f"PDF saved as: {output_file}")
25 |     else:
26 |         print(f"No images found in {input_directory}")
27 | 
28 |     # Delete the directory
29 |     try:
30 |         shutil.rmtree(input_directory)
31 |         print("Directory deleted successfully.")
32 |     except OSError as e:
33 |         print("Error: %s : %s" % (input_directory, e.strerror))
34 | 
35 | # To use the function:
36 | # convert_png_to_pdf("/path/to/your/png/directory", "output_filename")
37 | 


--------------------------------------------------------------------------------
/download_video.py:
--------------------------------------------------------------------------------
 1 | import subprocess
 2 | import os
 3 | import multiprocessing
 4 | from urllib.parse import urlparse, parse_qs, urlunparse
 5 | from youtube_transcript_api import YouTubeTranscriptApi
 6 | from youtube_transcript_api.formatters import SRTFormatter
 7 | 
 8 | def download_video(url):
 9 |     video_format = "bestvideo[height<=1080][ext=mp4]"  # This will download mp4 video in 640x360 resolution
10 |     output_template = "%(title)s.%(ext)s"  # This will name the video file as "title.mp4"
11 | 
12 |     # Build the yt-dlp command
13 |     command = [
14 |         "yt-dlp",
15 |         "-f", video_format,
16 |         "-o", output_template,
17 |         url,
18 |     ]
19 | 
20 |     # Execute the yt-dlp command
21 |     subprocess.run(command, check=True)
22 | 
23 |     # Get the video title
24 |     video_title = subprocess.check_output(["yt-dlp", "--get-filename", "-o", "%(title)s", url])
25 |     video_title = video_title.decode("utf-8").strip()
26 |     video_filename = f"{video_title}.mp4"
27 | 
28 |     print(video_filename)
29 |     # Call the generate_srt function
30 | 
31 | 
32 | 
33 |     return video_filename
34 | 
35 | def clean_url(url):
36 |     # 解析 URL
37 |     parsed_url = urlparse(url)
38 |     # 清理查询字符串参数
39 |     query = parse_qs(parsed_url.query)
40 |     # 重建 URL
41 |     cleaned_url = urlunparse((
42 |         parsed_url.scheme,
43 |         parsed_url.netloc,
44 |         parsed_url.path,
45 |         parsed_url.params,
46 |         "&".join(["{}={}".format(k, v[0]) for k, v in query.items()]),
47 |         parsed_url.fragment
48 |     ))
49 |     return cleaned_url
50 | 
51 | def old_generate_srt(video_filename):
52 |     # Generate srt using whisper-ctranslate2
53 |     num_cores = multiprocessing.cpu_count()
54 |     print("This notebook has access to {} cores".format(num_cores))
55 |     command_srt = [
56 |         "whisper-ctranslate2",
57 |         "--threads", str(num_cores) ,
58 |         "--output_format", "srt",
59 |         video_filename,
60 |     ]
61 | 
62 |     # Execute the whisper-ctranslate2 command
63 |     subprocess.run(command_srt, check=True)
64 | 
65 | def generate_srt(video_filename, video_url):
66 |     cleaned_url = clean_url(video_url)
67 |     command = ["yt-dlp", "--print", "id", cleaned_url]
68 | 
69 |     # Execute the command and capture the output
70 |     process = subprocess.run(command, capture_output=True, text=True)
71 |     # Extract the video ID from the output
72 |     video_id = process.stdout.strip()
73 |     transcript =YouTubeTranscriptApi.get_transcript(video_id)
74 | 
75 |     formatter = SRTFormatter()
76 |     srt_formatted = formatter.format_transcript(transcript)
77 | 
78 |     en_srt_name = video_filename.replace("mp4", "srt")
79 | 
80 |     with open(en_srt_name, 'w', encoding='utf-8') as srt_file:
81 |         srt_file.write(srt_formatted)
82 | 


--------------------------------------------------------------------------------
/font.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | from moviepy.editor import *
4 | from moviepy.editor import TextClip
5 | 
6 | print(TextClip.list("font"))
7 | print(TextClip.list('color'))
8 | # 列出你可以使用哪些 Font
9 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | from transcript import process_video_subs
 3 | from convert_png_to_pdf import convert_png_to_pdf
 4 | from video_to_images import video_to_images
 5 | import os
 6 | import argparse
 7 | import time  # Add time module
 8 | 
 9 | def main():
10 |     start_time = time.time()  # Start timing
11 |     
12 |     # Set up argument parser
13 |     parser = argparse.ArgumentParser(description="Process video and subtitles.")
14 |     parser.add_argument('video_name', help="Name of the video file to process.")
15 |     parser.add_argument('--embed', action='store_true', help="Use embedded subtitles.")
16 | 
17 |     # Parse arguments
18 |     args = parser.parse_args()
19 | 
20 |     # Determine base name
21 |     base_name = os.path.splitext(args.video_name)[0]
22 | 
23 |     # Process video subs and create screenshots
24 |     if args.embed:
25 |         process_video_subs(args.video_name) # with pre-embedd-sub
26 |     else:
27 |         video_to_images(args.video_name) # without pre-embedd-sub
28 | 
29 |     # Convert screenshots to PDF
30 |     convert_png_to_pdf(base_name, base_name)
31 |     
32 |     # Calculate and print execution time
33 |     end_time = time.time()
34 |     duration = end_time - start_time
35 |     print(f"\nTotal execution time: {duration:.2f} seconds ({duration/60:.2f} minutes)")
36 |     
37 |     os.system(f"open '{base_name}_text.pdf'")
38 | 
39 | if __name__ == "__main__":
40 |     main()
41 | 


--------------------------------------------------------------------------------
/silmilar.py:
--------------------------------------------------------------------------------
 1 | from PIL import Image
 2 | import imagehash
 3 | import os
 4 | 
 5 | def remove_duplicate_images(image_folder, hash_size=12, sim_threshold=10):
 6 |     """
 7 |     Remove duplicate images in a folder based on image hashing.
 8 | 
 9 |     :param image_folder: Folder containing images to be checked.
10 |     :param hash_size: Size of the hash, defaults to 8.
11 |     :param sim_threshold: Similarity threshold for considering images as duplicates, defaults to 5.
12 |     """
13 |     hashes = {}
14 |     duplicates = []
15 | 
16 |     for image_filename in os.listdir(image_folder):
17 |         if image_filename.endswith(('.png', '.jpg', '.jpeg')):
18 |             image_path = os.path.join(image_folder, image_filename)
19 |             try:
20 |                 # Create a hash for each image
21 |                 with Image.open(image_path) as img:
22 |                     temp_hash = imagehash.average_hash(img, hash_size)
23 | 
24 |                 # Check if the hash already exists in the dictionary
25 |                 if temp_hash in hashes:
26 |                     print(f"Duplicate found: {image_filename} is a duplicate of {hashes[temp_hash]}")
27 |                     duplicates.append(image_path)
28 |                 else:
29 |                     hashes[temp_hash] = image_filename
30 | 
31 |             except Exception as e:
32 |                 print(f"Error processing {image_filename}: {e}")
33 | 
34 |     # Optionally, remove the identified duplicate images
35 |     for duplicate in duplicates:
36 |         os.remove(duplicate)
37 |         #print(f"Removed duplicate image: {duplicate}")
38 | 


--------------------------------------------------------------------------------
/subtitle.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | from moviepy.editor import *
 3 | 
 4 | # 從命令行獲取MP4檔案名稱
 5 | video_file_name = sys.argv[1]
 6 | # 由MP4檔案名稱產生對應的SRT檔案名稱
 7 | subtitle_file_name = video_file_name.rsplit('.', 1)[0] + '.srt'
 8 | 
 9 | # Load your video
10 | clip = VideoFileClip(video_file_name)
11 | 
12 | # Load your subtitles
13 | with open(subtitle_file_name, "r") as f:
14 |     subtitles = f.read()
15 | 
16 | # Split subtitles into list
17 | subtitles = subtitles.split("\n\n")
18 | 
19 | # Process each subtitle
20 | subs = []
21 | for subtitle in subtitles:
22 |     # Split by newline
23 |     parts = subtitle.split("\n")
24 |     if len(parts) >= 3:
25 |         # Get start and end times
26 |         times = parts[1].split(" --> ")
27 |         start_time = times[0].split(":")
28 |         end_time = times[1].split(":")
29 |         start_time = int(start_time[0])*3600 + int(start_time[1])*60 + float(start_time[2].replace(",", "."))
30 |         end_time = int(end_time[0])*3600 + int(end_time[1])*60 + float(end_time[2].replace(",", "."))
31 |         # Get text
32 |         text = " ".join(parts[2:])
33 |         # Create text clip
34 |         text_clip = TextClip(text, fontsize=24, color='white').set_pos(('center', 'bottom')).set_duration(end_time - start_time).set_start(start_time)
35 |         
36 |         subs.append(text_clip)
37 | 
38 | # Overlay subtitles on video
39 | final = CompositeVideoClip([clip] + subs)
40 | 
41 | # Write the result to a file
42 | final.write_videofile(video_file_name.rsplit('.', 1)[0] + '_with_subs.mp4')
43 | 


--------------------------------------------------------------------------------
/transcript.py:
--------------------------------------------------------------------------------
 1 | import csv
 2 | import subprocess
 3 | import pandas as pd
 4 | import os
 5 | from datetime import timedelta
 6 | import pysrt
 7 | import sys
 8 | from tqdm import tqdm
 9 | 
10 | def detect_hardware_acceleration():
11 |     try:
12 |         # 尝试运行 ffmpeg 命令来列出支持的硬件加速方式
13 |         result = subprocess.run(['ffmpeg', '-hwaccels'], capture_output=True, text=True)
14 |         output = result.stdout
15 | 
16 |         # 检测 cuda 和 mps 支持
17 |         if 'cuda' in output:
18 |             return 'cuda'
19 |         elif 'mps' in output:
20 |             return 'mps'
21 |     except Exception as e:
22 |         print(f"检测硬件加速时出错: {e}")
23 | 
24 |     # 如果无法检测到支持的硬件加速，返回 None
25 |     return None
26 | 
27 | def process_video_subs(video_path):
28 |     base_name = os.path.splitext(video_path)[0]
29 |     os.makedirs(base_name, exist_ok=True)
30 | 
31 |     srt_path = f'{base_name}.srt'
32 |     if not os.path.exists(srt_path):
33 |         print(f'找不到字幕文件: {srt_path}')
34 |         return
35 | 
36 |     subs = pysrt.open(srt_path)
37 | 
38 |     hw_accel = detect_hardware_acceleration()
39 | 
40 |     # 使用 tqdm 包裹 subs
41 |     for i, sub in tqdm(enumerate(subs), total=len(subs), desc="处理中"):
42 |         start = timedelta(hours=sub.start.hours, minutes=sub.start.minutes, seconds=sub.start.seconds, milliseconds=sub.start.milliseconds)
43 |         end = timedelta(hours=sub.end.hours, minutes=sub.end.minutes, seconds=sub.end.seconds, milliseconds=sub.end.milliseconds)
44 |         mid_time = start + (end - start) / 2
45 | 
46 |         hours, remainder = divmod(mid_time.seconds, 3600)
47 |         minutes, seconds = divmod(remainder, 60)
48 |         timestamp = '{:02}:{:02}:{:02}.{:03}'.format(hours, minutes, seconds, mid_time.microseconds // 1000)
49 | 
50 |         screenshot_filename = os.path.join(base_name, f'{i+1:04}.png')
51 | 
52 |         ffmpeg_cmd = ['ffmpeg', '-loglevel', 'error', '-ss', timestamp, '-i', video_path, '-vframes', '1', '-pix_fmt', 'yuv420p', screenshot_filename]
53 | 
54 |         if hw_accel:
55 |             ffmpeg_cmd.insert(1, '-hwaccel')
56 |             ffmpeg_cmd.insert(2, hw_accel)
57 | 
58 |         subprocess.run(ffmpeg_cmd)
59 | 


--------------------------------------------------------------------------------
/translate_srt.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | import pysrt
 3 | import concurrent.futures
 4 | import os
 5 | from tqdm import tqdm
 6 | import sys
 7 | import chardet
 8 | # Your DeepL API key from the environment variable
 9 | api_key = os.getenv('DEEPL_API_KEY')
10 | 
11 | def translate_text(text, target_language='zh'):
12 |     text = text.replace("\h", " ")
13 |     base_url = 'https://api.deepl.com/v2/translate'
14 |     payload = {
15 |         'auth_key': api_key,
16 |         'text': text,
17 |         'target_lang': target_language,
18 |     }
19 |     response = requests.post(base_url, data=payload)
20 |     if response.status_code != 200:
21 |         raise Exception('DeepL request failed with status code {}'.format(response.status_code))
22 |     translated_text = response.json()['translations'][0]['text']
23 |     return translated_text
24 | 
25 | def translate_srt_file(file_path, target_language='zh'):
26 |     # Load the .srt file
27 | 
28 |     with open(file_path, 'rb') as f:
29 |         result = chardet.detect(f.read())
30 | 
31 |     subs = pysrt.open(file_path, encoding=result['encoding'])
32 | 
33 |     # Translate each subtitle
34 |     with concurrent.futures.ThreadPoolExecutor() as executor:
35 |         future_to_sub = {executor.submit(translate_text, sub.text, target_language): sub for sub in subs}
36 |         for future in tqdm(concurrent.futures.as_completed(future_to_sub), total=len(subs), desc='Translating subtitles'):
37 |             sub = future_to_sub[future]
38 |             try:
39 |                 translated_text = future.result()
40 |                 sub.text = translated_text
41 |             except Exception as exc:
42 |                 print('%r generated an exception: %s' % (sub, exc))
43 | 
44 |     # Save the translated .srt file
45 |     subs.save(file_path.replace('.srt', '.zh.srt'), encoding='utf-8')
46 | 
47 | 
48 | if __name__ == "__main__":
49 |     if len(sys.argv) != 2:
50 |         print(f"Usage: {sys.argv[0]} filename")
51 |         sys.exit(1)
52 | 
53 |     translate_srt_file(sys.argv[1])
54 | 


--------------------------------------------------------------------------------
/translate_txt.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | import os
 3 | from concurrent.futures import ThreadPoolExecutor
 4 | from tqdm import tqdm
 5 | import sys
 6 | # Your DeepL API key from the environment variable
 7 | api_key = os.getenv('DEEPL_API_KEY')
 8 | 
 9 | def translate_text(args):
10 |     text, target_language = args
11 |     text = text.replace("\h", " ")
12 |     base_url = 'https://api.deepl.com/v2/translate'
13 |     payload = {
14 |         'auth_key': api_key,
15 |         'text': text,
16 |         'target_lang': target_language,
17 |     }
18 |     response = requests.post(base_url, data=payload)
19 |     if response.status_code != 200:
20 |         print(f'DeepL request failed with status code {response.status_code} for text: {text}')
21 |         return text
22 |     translated_text = response.json()['translations'][0]['text']
23 |     return translated_text
24 | 
25 | def parallel_translation(file_path, target_language='zh', num_workers=5):
26 |     with open(file_path, 'r', encoding='utf-8') as file:
27 |         content = file.read()
28 | 
29 |     sentences = content.split('.')
30 |     with ThreadPoolExecutor(max_workers=num_workers) as executor:
31 |         translated_sentences = list(tqdm(executor.map(translate_text, [(sentence, target_language) for sentence in sentences]), total=len(sentences)))
32 | 
33 |     translated_text = '\n'.join(translated_sentences)
34 |     with open('translated_'+file_path, 'w', encoding='utf-8') as file:
35 |         print(translated_text)
36 |         file.write(translated_text)
37 | 
38 | if __name__ == "__main__":
39 |     if len(sys.argv) != 2:
40 |         print(f"Usage: {sys.argv[0]} filename")
41 |         sys.exit(1)
42 | 
43 |     parallel_translation(sys.argv[1])
44 | 


--------------------------------------------------------------------------------
/video2slide.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import os
 3 | from download_video import download_video
 4 | from download_video import generate_srt
 5 | from download_video import old_generate_srt
 6 | from transcript import process_video_subs
 7 | from video_to_images import video_to_images
 8 | from convert_png_to_pdf import convert_png_to_pdf
 9 | from silmilar import remove_duplicate_images
10 | os.environ['SDL_AUDIODRIVER'] = 'dummy'
11 | 
12 | if __name__ == "__main__":
13 |     if len(sys.argv) != 2:
14 |         print(f"Usage: {sys.argv[0]} [file name]")
15 |         sys.exit(1)
16 | 
17 |     # 获取命令行提供的文件名
18 |     file_name = sys.argv[1]
19 | 
20 |     # 处理字幕和创建截图
21 |     process_video_subs(file_name)  # with pre-embedded-sub
22 |     base_name = os.path.splitext(file_name)[0]  # 从文件名中去掉扩展名
23 | 
24 |     # 移除重复图片
25 |     remove_duplicate_images(base_name)
26 | 
27 |     # 将截图转换为PDF
28 |     convert_png_to_pdf(base_name, base_name, "_slide")
29 | 


--------------------------------------------------------------------------------
/video_to_images.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import subprocess
  3 | import json
  4 | import cv2
  5 | import numpy as np
  6 | from PIL import Image, ImageDraw, ImageFont
  7 | from concurrent.futures import ThreadPoolExecutor
  8 | from tqdm import tqdm
  9 | import time  # 添加 time 模組
 10 | 
 11 | def get_video_dimensions(video_path):
 12 |     cmd = ["ffprobe", "-v", "error", "-select_streams", "v:0",
 13 |            "-show_entries", "stream=width,height,sample_aspect_ratio", "-of", "json", video_path]
 14 |     info = json.loads(subprocess.check_output(cmd).decode())
 15 |     stream = info['streams'][0]
 16 |     w, h = stream['width'], stream['height']
 17 |     sar = stream.get('sample_aspect_ratio', '1:1').split(':')
 18 |     return int(w * float(sar[0])/float(sar[1])), h
 19 | 
 20 | def parse_subtitle(entry):
 21 |     lines = entry.strip().split('\n')
 22 |     if len(lines) < 3: return None, None, None
 23 |     start = sum(float(x)*60**i for i,x in enumerate(reversed(lines[1].split(' --> ')[0].replace(',','.').split(':'))))
 24 |     end = sum(float(x)*60**i for i,x in enumerate(reversed(lines[1].split(' --> ')[1].replace(',','.').split(':'))))
 25 |     return start, end, ' '.join(lines[2:])
 26 | 
 27 | def init_process(font_path, font_size):
 28 |     global font
 29 |     font = ImageFont.truetype(font_path, font_size)
 30 | 
 31 | def process_task(args):
 32 |     i, start, text, out_dir, width, height, y = args
 33 |     cap = cv2.VideoCapture(video_path_global)
 34 |     if not cap.isOpened():
 35 |         print(f"Error: Could not open video file for frame {i}")
 36 |         return i
 37 |         
 38 |     cap.set(cv2.CAP_PROP_POS_MSEC, start*1000)
 39 |     ret, frame = cap.read()
 40 |     
 41 |     if not ret or frame is None:
 42 |         print(f"Error: Could not read frame {i} at time {start} seconds")
 43 |         cap.release()
 44 |         return i
 45 |         
 46 |     try:
 47 |         img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
 48 |         draw = ImageDraw.Draw(img)
 49 |         
 50 |         # 獲取字體度量
 51 |         ascent, descent = font.getmetrics()
 52 |         text_vertical_span = ascent + descent
 53 |         
 54 |         # 計算文字寬度
 55 |         text_width = draw.textlength(text, font=font)
 56 |         
 57 |         # 計算背景矩形參數
 58 |         padding = 5
 59 |         rect_left = (width - text_width) // 2 - padding
 60 |         rect_top = y - ascent - padding
 61 |         rect_right = rect_left + text_width + padding * 2
 62 |         rect_bottom = rect_top + text_vertical_span + padding * 2
 63 |         
 64 |         # 繪製背景
 65 |         draw.rectangle([(rect_left, rect_top), (rect_right, rect_bottom)], fill='black')
 66 |         
 67 |         # 計算文字位置（水平居中，垂直居中）
 68 |         text_x = (width - text_width) // 2
 69 |         text_y = rect_top + padding
 70 |         
 71 |         # 繪製文字
 72 |         draw.text((text_x, text_y), text, fill=(255,255,0), font=font)
 73 |         
 74 |         img.save(f"{out_dir}/{i:04d}.jpg", quality=95)
 75 |     except Exception as e:
 76 |         print(f"Error processing frame {i}: {str(e)}")
 77 |     finally:
 78 |         cap.release()
 79 |     return i
 80 | 
 81 | def video_to_images(video_path):
 82 |     start_time = time.time()  # 開始計時
 83 |     
 84 |     global video_path_global
 85 |     video_path_global = video_path
 86 |     base = os.path.splitext(video_path)[0]
 87 |     os.makedirs(base, exist_ok=True)
 88 |     
 89 |     with open(f"{base}.zh.srt", 'r', encoding='utf-8') as f:
 90 |         subs = [parse_subtitle(entry) for entry in f.read().split('\n\n') if entry.strip()]
 91 |     
 92 |     w, h = get_video_dimensions(video_path)
 93 |     scale = max(w, h) / 1920
 94 |     font_size = int(50 * scale)
 95 |     y_pos = h - 100 if w > 640 else h - 30
 96 |     
 97 |     # Try different font paths
 98 |     possible_font_paths = [
 99 |         "Noto-Sans-CJK-SC.ttf",
100 |         "/System/Library/Fonts/PingFang.ttc",  # macOS default Chinese font
101 |         "/System/Library/Fonts/Arial Unicode.ttf",  # Another common option
102 |         "/usr/share/fonts/truetype/noto/NotoSansCJK-Regular.ttc"  # Linux path
103 |     ]
104 |     
105 |     font_path = None
106 |     for path in possible_font_paths:
107 |         if os.path.exists(path):
108 |             font_path = path
109 |             break
110 |     
111 |     if font_path is None:
112 |         raise FileNotFoundError("Could not find a suitable font. Please install Noto Sans CJK or specify a valid font path.")
113 |     
114 |     tasks = [(i, s, t, base, w, h, y_pos) for i, (s, e, t) in enumerate(subs) if t]
115 |     
116 |     with ThreadPoolExecutor(initializer=init_process, initargs=(font_path, font_size),
117 |                            max_workers=os.cpu_count()*2) as executor:
118 |         list(tqdm(executor.map(process_task, tasks), total=len(tasks)))
119 |     
120 |     end_time = time.time()  # 結束計時
121 |     duration = end_time - start_time
122 |     print(f"\n處理完成！")
123 |     print(f"總共處理了 {len(tasks)} 張圖片")
124 |     print(f"總耗時: {duration:.2f} 秒")
125 |     print(f"平均每張圖片處理時間: {duration/len(tasks):.2f} 秒")


--------------------------------------------------------------------------------
/video_to_srt.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import os
  3 | import math
  4 | import subprocess
  5 | import json
  6 | import tempfile
  7 | from datetime import datetime, timedelta
  8 | from pathlib import Path
  9 | import logging
 10 | from groq import Groq
 11 | 
 12 | # 设置日志
 13 | logging.basicConfig(level=logging.INFO)
 14 | logger = logging.getLogger(__name__)
 15 | 
 16 | def format_timestamp(seconds):
 17 |     """将秒数转换为 SRT 时间格式"""
 18 |     hours = int(seconds // 3600)
 19 |     minutes = int((seconds % 3600) // 60)
 20 |     secs = int(seconds % 60)
 21 |     msecs = int((seconds - int(seconds)) * 1000)
 22 |     return f"{hours:02d}:{minutes:02d}:{secs:02d},{msecs:03d}"
 23 | 
 24 | def get_video_duration(filename):
 25 |     """使用 ffprobe 获取视频时长（秒）"""
 26 |     cmd = [
 27 |         'ffprobe',
 28 |         '-v', 'quiet',
 29 |         '-print_format', 'json',
 30 |         '-show_format',
 31 |         '-show_streams',
 32 |         str(filename)
 33 |     ]
 34 |     try:
 35 |         result = subprocess.run(cmd, capture_output=True, text=True)
 36 |         data = json.loads(result.stdout)
 37 |         return float(data['format']['duration'])
 38 |     except Exception as e:
 39 |         logger.error(f"获取视频时长失败: {str(e)}")
 40 |         return None
 41 | 
 42 | def split_video(input_file, segment_length=600):
 43 |     """将视频分割成指定长度的片段"""
 44 |     duration = get_video_duration(input_file)
 45 |     if not duration:
 46 |         return None
 47 | 
 48 |     segments = []
 49 |     temp_dir = tempfile.mkdtemp()
 50 |     
 51 |     for i in range(0, int(duration), segment_length):
 52 |         output_file = Path(temp_dir) / f"segment_{i:04d}.m4a"
 53 |         cmd = [
 54 |             'ffmpeg',
 55 |             '-i', str(input_file),
 56 |             '-ss', str(i),
 57 |             '-t', str(segment_length),
 58 |             '-vn',
 59 |             '-acodec', 'copy',
 60 |             str(output_file),
 61 |             '-y'
 62 |         ]
 63 |         try:
 64 |             subprocess.run(cmd, check=True, capture_output=True)
 65 |             segments.append({
 66 |                 'file': output_file,
 67 |                 'start_time': i
 68 |             })
 69 |         except subprocess.CalledProcessError as e:
 70 |             logger.error(f"分割视频失败: {str(e)}")
 71 |             return None
 72 | 
 73 |     return segments
 74 | 
 75 | def transcribe_segment(client, segment):
 76 |     """转录单个视频片段"""
 77 |     try:
 78 |         logger.info(f"开始转录片段: {segment['file']}")
 79 |         with open(segment['file'], "rb") as file:
 80 |             transcription = client.audio.transcriptions.create(
 81 |                 file=(str(segment['file']), file.read()),
 82 |                 model="whisper-large-v3",
 83 |                 response_format="verbose_json",
 84 |                 prompt="这是中文字幕"
 85 |             )
 86 |         logger.info(f"成功转录片段: {segment['file']}")
 87 |         return transcription.segments
 88 |     except Exception as e:
 89 |         logger.error(f"转录片段失败 {segment['file']}: {str(e)}")
 90 |         return None
 91 | 
 92 | def create_srt(segments, start_time_offset=0):
 93 |     """将转录结果转换为 SRT 格式"""
 94 |     srt_content = []
 95 |     counter = 1
 96 |     
 97 |     for segment in segments:
 98 |         start_time = start_time_offset + float(segment['start'])
 99 |         end_time = start_time_offset + float(segment['end'])
100 |         text = segment['text'].strip()
101 |         
102 |         srt_content.append(f"{counter}\n{format_timestamp(start_time)} --> {format_timestamp(end_time)}\n{text}\n")
103 |         counter += 1
104 |         
105 |     return "\n".join(srt_content)
106 | 
107 | def create_txt(segments):
108 |     """将转录结果转换为纯文本格式"""
109 |     return "\n".join(segment['text'].strip() for segment in segments)
110 | 
111 | def check_environment():
112 |     api_key = os.getenv('GROQ_API_KEY')
113 |     if not api_key:
114 |         print("错误：未找到 GROQ_API_KEY 环境变量")
115 |         print("请确保您已经设置了环境变量，可以通过以下方式之一设置：")
116 |         print("1. 在终端执行: export GROQ_API_KEY='您的API密钥'")
117 |         print("2. 在 .zshrc 或 .bashrc 中添加后，执行 source ~/.zshrc 或 source ~/.bashrc")
118 |         sys.exit(1)
119 |     return api_key
120 | 
121 | def main():
122 |     if len(sys.argv) != 2:
123 |         print("用法: python video_to_srt.py <输入视频文件>")
124 |         sys.exit(1)
125 |     
126 |     api_key = check_environment()
127 |     input_file = Path(sys.argv[1])
128 |     if not input_file.exists():
129 |         logger.error(f"Input file not found: {input_file}")
130 |         sys.exit(1)
131 | 
132 |     try:
133 |         client = Groq(api_key=api_key)
134 |         logger.info("Groq client initialized successfully")
135 | 
136 |         # 分割视频
137 |         logger.info("开始分割视频...")
138 |         segments = split_video(input_file)
139 |         if not segments:
140 |             raise Exception("视频分割失败")
141 | 
142 |         # 转录每个片段
143 |         logger.info("开始转录片段...")
144 |         final_srt = ""
145 |         all_transcriptions = []  # 存储所有转录结果
146 |         
147 |         for segment in segments:
148 |             transcription = transcribe_segment(client, segment)
149 |             if not transcription:
150 |                 raise Exception(f"转录失败: {segment['file']}")
151 |                 
152 |             srt_content = create_srt(transcription, segment['start_time'])
153 |             final_srt += srt_content + "\n"
154 |             all_transcriptions.extend(transcription)  # 收集所有转录结果
155 |             
156 |             # 删除临时文件
157 |             os.unlink(segment['file'])
158 | 
159 |         # 保存SRT文件（中文）
160 |         srt_file = input_file.with_suffix('.zh.srt')
161 |         with open(srt_file, 'w', encoding='utf-8') as f:
162 |             f.write(final_srt)
163 |         logger.info(f"字幕文件已保存为: {srt_file}")
164 | 
165 |         # 保存纯文本文件
166 |         txt_file = input_file.with_suffix('.txt')
167 |         with open(txt_file, 'w', encoding='utf-8') as f:
168 |             f.write(create_txt(all_transcriptions))
169 |         logger.info(f"文本文件已保存为: {txt_file}")
170 | 
171 |     except Exception as e:
172 |         logger.error(f"处理失败: {str(e)}")
173 |         sys.exit(1)
174 | 
175 | if __name__ == "__main__":
176 |     main() 


--------------------------------------------------------------------------------
/you_dt.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import os
 3 | from download_video import download_video
 4 | from download_video import generate_srt
 5 | from translate_srt import translate_srt_file
 6 | from video_to_images import video_to_images
 7 | from convert_png_to_pdf import convert_png_to_pdf
 8 | os.environ['SDL_AUDIODRIVER'] = 'dummy'
 9 | 
10 | 
11 | if __name__ == "__main__":
12 |     if len(sys.argv) != 2:
13 |         print(f"Usage: {sys.argv[0]} [youtube url]")
14 |         sys.exit(1)
15 | 
16 |     # 擷取影片和字幕檔案
17 |     file_name = download_video(sys.argv[1])
18 |     generate_srt(file_name,sys.argv[1])
19 |     # xxx.en.srt
20 |     en_srt_name = file_name.replace("mp4", "srt")
21 |     translate_srt_file(en_srt_name)
22 |     # 取得影片檔案名稱和基本名稱
23 | 
24 |     video_file_name = file_name
25 |     # xxx.en.srt => xxx.mp4
26 | 
27 |     base_name = os.path.splitext(video_file_name)[0]
28 |     # xxx.mp4 => xxx
29 | 
30 |     # 將影片轉換為圖片
31 |     video_to_images(video_file_name)
32 | 
33 |     # 轉換圖片為PDF
34 |     convert_png_to_pdf(base_name, base_name)
35 | 


--------------------------------------------------------------------------------
/youtube2slide.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import os
 3 | from download_video import download_video
 4 | from download_video import generate_srt
 5 | from download_video import old_generate_srt
 6 | from transcript import process_video_subs
 7 | from video_to_images import video_to_images
 8 | from convert_png_to_pdf import convert_png_to_pdf
 9 | from silmilar import remove_duplicate_images
10 | os.environ['SDL_AUDIODRIVER'] = 'dummy'
11 | 
12 | 
13 | if __name__ == "__main__":
14 |     if len(sys.argv) != 2:
15 |         print(f"Usage: {sys.argv[0]} [youtube url]")
16 |         sys.exit(1)
17 | 
18 |     # 擷取影片和字幕檔案
19 |     file_name = download_video(sys.argv[1])
20 |     generate_srt(file_name,sys.argv[1])
21 | 
22 |     # 取得影片檔案名稱和基本名稱
23 | 
24 |     video_file_name = file_name
25 |     # xxx.en.srt => xxx.mp4
26 | 
27 |     base_name = os.path.splitext(video_file_name)[0]
28 |     # xxx.mp4 => xxx
29 | 
30 |     # 將影片轉換為圖片
31 |     # Process video subs and create screenshots
32 |     process_video_subs(file_name) # with pre-embedd-sub
33 |     remove_duplicate_images(base_name)
34 |     # Convert screenshots to PDF
35 |     convert_png_to_pdf(base_name, base_name, "_slide")
36 | 


--------------------------------------------------------------------------------