├── Auto_Chap
    ├── Auto_Chap.py
    ├── Changelog.md
    └── requirements.txt
├── Chapter_Snapper
    └── Chapter_Snapper.py
├── Converter
    └── Converter.py
├── Hidive_Splitter
    └── Hidive_Splitter.py
├── Overlap_Blue
    └── Overlap_Blue.py
├── P-Proper_Stutter
    └── P-Proper_Stutter.py
├── README.md
├── Regex_Stuff
    └── Regex_Stuff.py
├── Resampler
    └── Resampler.py
├── Sign_DeOverlap
    └── Sign_DeOverlap.py
└── Style_Cleanup
    └── Style_Cleanup.py


/Auto_Chap/Auto_Chap.py:
--------------------------------------------------------------------------------
  1 | # Auto Chap V4.2
  2 | import sys
  3 | import json
  4 | import os
  5 | import urllib
  6 | import time
  7 | import warnings
  8 | import argparse
  9 | import shutil
 10 | import math
 11 | from pathlib import Path
 12 | import requests
 13 | import subprocess
 14 | from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor, as_completed
 15 | import librosa
 16 | import audioread.ffdec
 17 | import numpy as np
 18 | from scipy import signal
 19 | import matplotlib.pyplot as plt
 20 | 
 21 | ### Chapter Names
 22 | PRE_OP = "Prologue"
 23 | OPENING = "Opening"
 24 | EPISODE = "Episode"
 25 | ENDING = "Ending"
 26 | POST_ED = "Epilogue"
 27 | 
 28 | # Ignore librosa warnings about audioread. Try downgrading to librosa < 1.0 if it fully breaks
 29 | warnings.filterwarnings("ignore", category=FutureWarning)
 30 | warnings.filterwarnings("ignore", category=UserWarning)
 31 | 
 32 | def parse_args():
 33 |     parser = argparse.ArgumentParser(description="Automatic anime chapter generator using AnimeThemes.")
 34 |     parser.add_argument(
 35 |         "--input", "-i", type=Path, required=True,
 36 |         help="Video/Audio file.",
 37 |     )
 38 | 
 39 |     parser.add_argument(
 40 |         "--output", "-o", type=Path,
 41 |         help="Output chapter file. Defaults to where the episode is.",
 42 |     )
 43 | 
 44 |     parser.add_argument(
 45 |         "--search-name", "-s", type=str,
 46 |         help="Search to pass to animethemes.moe Example: Spy Classroom Season 2. To only use themes that are already downloaded, don't add this argument.",
 47 |     )
 48 | 
 49 |     parser.add_argument(
 50 |         "--year", type=int,
 51 |         help="Release year to help filter the search. Put the negative number to allow that year or later.",
 52 |     )
 53 | 
 54 |     parser.add_argument(
 55 |         "--snap", type=int, nargs='?', const=1000, default=None,
 56 |         help="Millisecond window to snap to nearest keyframe for frame-perfect chapters. Efficiently generates necessary keyframes from video. Defaults to 1000ms if no value added. Values higher than about 1000 currently crash.",
 57 |     )
 58 | 
 59 |     parser.add_argument(
 60 |         "--episode-snap", type=float, default=4,
 61 |         help="Window in seconds to snap chapters to the start or end of the episode. This gets applied at the very end. Defaults to 4."
 62 |     )
 63 | 
 64 |     parser.add_argument(
 65 |         "--score", type=int, default=2000,
 66 |         help="Score required for a theme to be accepted as a match. Increase it to reduce false positives, decrease it to be more lenient. Score is y-axis in charts divided by downsample factor. Defaults to 2000.",
 67 |     )
 68 | 
 69 |     parser.add_argument(
 70 |         "--theme-portion", type=float, default=0.9,
 71 |         help="Portion of a theme required in the episode to be a match. Keep below 1 so that it can still match themes that get slightly cut off. Defaults to 0.9."
 72 |     )
 73 | 
 74 |     parser.add_argument(
 75 |         "--downsample", type=int, default=32,
 76 |         help="Factor to downsample audio when matching, higher means speedier potentially with lower accuracy. Defaults to 32.",
 77 |     )
 78 | 
 79 |     parser.add_argument(
 80 |         "--parallel-dl", type=int, default=10,
 81 |         help="How many themes to download in parallel. Defaults to 10.",
 82 |     )
 83 | 
 84 |     parser.add_argument(
 85 |         "--work-path", "-w", type=Path,
 86 |         help="Place to create a .themes folder for storing persistent information per series. Defaults to where the episode is.",
 87 |     )
 88 | 
 89 |     parser.add_argument(
 90 |         "--delete-themes", "-d", default=False, action="store_true",
 91 |         help="Delete the themes and charts after running.",
 92 |     )
 93 | 
 94 |     parser.add_argument(
 95 |         "--charts", "-c", default=False, action="store_true",
 96 |         help="Make charts of where themes are matched in the episode. They can almost double processing time in some cases though.",
 97 |     )
 98 | 
 99 |     args = parser.parse_args()
100 |     args.no_download = False
101 | 
102 |     if args.search_name is None:
103 |         args.no_download = True
104 | 
105 |     if args.work_path is None:
106 |         args.work_path = Path(os.path.dirname(args.input))
107 | 
108 |     if args.output is None:
109 |         args.output = args.input.with_name(args.input.stem + ".chapters.txt")
110 | 
111 |     if args.snap is not None:
112 |         if args.snap > 1000:
113 |             print("Snap values higher than about 1000 currently crash SCXvid. Please lower it.", file=sys.stderr)
114 |             sys.exit(1)
115 | 
116 |     if args.theme_portion <= 0:
117 |         print("Theme portion must be more than 0.", file=sys.stderr)
118 |         sys.exit(1)
119 |     elif args.theme_portion > 1:
120 |         print("Theme portion must be less than or equal to 1.", file=sys.stderr)
121 |         sys.exit(1)
122 | 
123 |     args.episode_audio_path = None
124 | 
125 |     return args
126 | 
127 | def print_seperator():
128 |     print("------------------------------")
129 | 
130 | def make_folders(work_path):
131 |     subdirectory_path = work_path / ".themes" / "charts"
132 |     shutil.rmtree(subdirectory_path, ignore_errors=True)
133 |     subdirectory_path.mkdir(parents=True)
134 | 
135 | def get_series_json(args):
136 |     api_search_call = f"https://api.animethemes.moe/search?fields[search]=anime&q={urllib.parse.quote(args.search_name)}"
137 |     if args.year:
138 |         if args.year < 0:
139 |             api_search_call += f"&filter[year-gte]={abs(args.year)}"
140 |         else:
141 |             api_search_call += f"&filter[year]={args.year}"
142 |     global_search = requests.get(api_search_call).json()
143 |     series_slug = global_search["search"]["anime"][0]["slug"]
144 |     series_json = requests.get(f"https://api.animethemes.moe/anime/{series_slug}?include=animethemes.animethemeentries.videos.audio&fields[audio]=filename,updated_at,link").json()
145 |     return series_json["anime"]
146 | 
147 | def download_theme(t_path, theme_name, url):
148 |     response = requests.get(url)
149 |     if response.status_code == 200:
150 |         download_path = f'{t_path}/{theme_name}'
151 |         download_path += ".ogg"
152 |         with open(download_path, "wb") as file:
153 |             file.write(response.content)
154 |         print(f"{theme_name}: Downloaded     ", file=sys.stderr)
155 |     else:
156 |         print(f"Failed to download {theme_name}. Status code:", response.status_code, file=sys.stderr)
157 | 
158 | def download_themes(t_path, args, series_json):
159 |     try:
160 |         with open(os.path.join(t_path, "data.json")) as data:
161 |             stored_data = json.load(data)
162 |     except Exception:
163 |         stored_data = {}
164 | 
165 |     # Reset themes if series different from last time
166 |     if stored_data.get("series_name") != series_json["name"]:
167 |         stored_data = {"series_name": series_json["name"]}
168 |         files = os.listdir(t_path)
169 |         for file in files:
170 |             if file.endswith(".ogg"):
171 |                 file_path = os.path.join(t_path, file)
172 |                 os.remove(file_path)
173 | 
174 |     need_download = []
175 | 
176 |     for theme in series_json["animethemes"]:
177 |         audio_version = 1
178 |         audio_links = []
179 |         cur_theme = theme["slug"] # OP1 or ED3, etc.
180 |         if not cur_theme[-1].isdigit():
181 |             cur_theme = cur_theme + "1"
182 |         for version in theme["animethemeentries"]: # Different video versions of theme
183 |             full_cur_theme = cur_theme
184 |             if audio_version > 1:
185 |                 full_cur_theme += f"v{audio_version}"
186 |             for video in version["videos"]:
187 |                 if video["overlap"] != "None": # No overs or transitions
188 |                     continue
189 |                 try: # Look to see if it is in data.json or needs an update
190 |                     if video["audio"]["updated_at"] == stored_data[full_cur_theme]["updated_at"] and \
191 |                         video["audio"]["link"] not in audio_links and \
192 |                         os.path.isfile(os.path.join(t_path, full_cur_theme + ".ogg")):
193 |                             audio_links.append(video["audio"]["link"])
194 |                             print(f"{full_cur_theme}: Found in directory", file=sys.stderr)
195 |                             audio_version += 1
196 |                             break
197 |                 except Exception:
198 |                     pass
199 |                 # Add to data.json
200 |                 stored_data[full_cur_theme] = {}
201 |                 stored_data[full_cur_theme]["updated_at"] = video["audio"]["updated_at"]
202 |                 stored_data[full_cur_theme]["animethemes_filename"] = video["audio"]["filename"]
203 |                 if video["audio"]["link"] not in audio_links:
204 |                     need_download.append((full_cur_theme, video["audio"]["link"]))
205 |                     audio_links.append(video["audio"]["link"])
206 |                     audio_version += 1
207 | 
208 |     if len(need_download) > 0:
209 |         print("Downloading themes...")
210 | 
211 |     with ThreadPoolExecutor(max_workers=args.parallel_dl) as executor:
212 |         future_to_url = {executor.submit(download_theme, t_path, theme, url): (theme, url) for (theme, url) in need_download}
213 |         for future in as_completed(future_to_url):
214 |             url = future_to_url[future]
215 |             try:
216 |                 data = future.result()
217 |             except Exception as exc:
218 |                 print(f"{url} generated an exception: {exc}", file=sys.stderr)
219 | 
220 |     with open(os.path.join(t_path, "data.json"), "w") as outfile:
221 |         json.dump(stored_data, outfile, indent=4)
222 | 
223 | def generate_chart(theme_name, c, t_path, matched=True):
224 |     try:
225 |         fig, ax = plt.subplots()
226 |         ax.plot(c)
227 |     except Exception as exc:
228 |         print(f"{theme_name}: Could not plot figure - {exc}", file=sys.stderr)
229 |         return
230 | 
231 |     try:
232 |         if matched:
233 |             fig.savefig(os.path.join(f"{t_path}", "charts", f"{theme_name}_matched.png"))
234 |         else:
235 |             fig.savefig(os.path.join(f"{t_path}", "charts", f"{theme_name}.png"))
236 |     except Exception as exc:
237 |         print(f"{theme_name}: Could not save figure - {exc}", file=sys.stderr)
238 |         return
239 | 
240 |     print(f"{theme_name}: Chart generated")
241 | 
242 | def find_offset(y_episode, sr_episode, theme_file, t_path, args):
243 |     theme_name = os.path.splitext(theme_file.name)[0]
244 | 
245 |     try:
246 |         aro = audioread.ffdec.FFmpegAudioFile(str(theme_file))
247 |         y_theme, _ = librosa.load(aro, sr=sr_episode)
248 |     except Exception as exc:
249 |         print(f"{theme_name}: Could not load theme file - {exc}", file=sys.stderr)
250 |         sys.exit(1)
251 | 
252 |     y_episode= y_episode[::args.downsample]
253 |     y_theme = y_theme[::args.downsample]
254 | 
255 |     # 5 secs silence prepended to fix matches at the beginning of episode
256 |     silence_length = int(5 * sr_episode / args.downsample)
257 |     y_episode_adjust = np.empty(silence_length + len(y_episode), dtype=y_episode.dtype)
258 |     y_episode_adjust[:silence_length] = 0
259 |     y_episode_adjust[silence_length:] = y_episode
260 | 
261 |     duration = librosa.get_duration(path=str(theme_file))
262 |     y_theme_first_portion = y_theme[:int(sr_episode * ((duration + 5) * args.theme_portion) / args.downsample)]
263 | 
264 |     try:
265 |         c = signal.correlate(y_episode_adjust, y_theme_first_portion, mode="valid", method="auto")
266 |     except Exception as exc:
267 |         print(f"{theme_name}: Error in correlate - {exc}", file=sys.stderr)
268 |         return None, None
269 | 
270 |     required_score = args.score / args.downsample
271 | 
272 |     match_idx = np.argmax(c)
273 |     score = np.max(c)
274 |     offset = max(round((match_idx - silence_length) / (sr_episode / args.downsample), 2), 0)
275 | 
276 |     if score > required_score:
277 |         print(f"{theme_name}: Matched from {get_timestamp(offset)} -> {get_timestamp(offset + duration)}", file=sys.stderr)
278 |         if args.charts:
279 |             with ProcessPoolExecutor() as executor:
280 |                 executor.submit(generate_chart, theme_name, c, t_path, True)
281 |         return offset, (offset + duration)
282 | 
283 |     else:
284 |         print(f"{theme_name}: Not matched", file=sys.stderr)
285 |         if args.charts:
286 |             with ProcessPoolExecutor() as executor:
287 |                 executor.submit(generate_chart, theme_name, c, t_path, False)
288 |         return None, None
289 | 
290 | def get_timestamp(timesec):
291 |     timestamp = time.strftime(f"%H:%M:%S.{round(timesec%1*1000):03}", time.gmtime(timesec))
292 |     return timestamp
293 | 
294 | def chapter_validator(offset_list, file_duration):
295 |     if len(offset_list) == 0:
296 |         print_seperator()
297 |         print("No matches", file=sys.stderr)
298 |         return False
299 |     elif len(offset_list) == 2:
300 |         return True
301 |     elif len(offset_list) == 4:
302 |         if offset_list[0] > (file_duration / 2) and offset_list[2] > (file_duration / 2):
303 |             print_seperator()
304 |             print("Chapters not valid. They both start in the second half", file=sys.stderr)
305 |             return False
306 |         elif offset_list[0] < (file_duration / 2) and offset_list[2] < (file_duration / 2):
307 |             print_seperator()
308 |             print("Chapters not valid. They both start in the first half", file=sys.stderr)
309 |             return False
310 |         else:
311 |             return True
312 |     else:
313 |         print_seperator()
314 |         print("Chapters not valid. Invalid number of offsets", file=sys.stderr)
315 |         return False
316 | 
317 | def generate_chapters(offset_list, file_duration, args):
318 |     outfile = open(args.output, "w", encoding="utf-8")
319 |     snap_beginning = False
320 |     snap_end = False
321 |     ed_only = False
322 | 
323 |     if offset_list[0] < args.episode_snap:
324 |         snap_beginning = True
325 | 
326 |     if offset_list[-1] > (file_duration - args.episode_snap):
327 |         snap_end = True
328 | 
329 |     if offset_list[0] < (file_duration / 2):
330 |         pass
331 |     else:
332 |         ed_only = True
333 | 
334 |     outfile.write("CHAPTER01=00:00:00.000\n")
335 |     if snap_beginning:
336 |         outfile.write(f"CHAPTER01NAME={OPENING}\n")
337 |         outfile.write(f"CHAPTER02={get_timestamp(offset_list[1])}\n")
338 |         outfile.write(f"CHAPTER02NAME={EPISODE}\n")
339 |         if len(offset_list) == 4:
340 |             outfile.write(f"CHAPTER03={get_timestamp(offset_list[2])}\n")
341 |             outfile.write(f"CHAPTER03NAME={ENDING}\n")
342 |             if not snap_end:
343 |                 outfile.write(f"CHAPTER04={get_timestamp(offset_list[3])}\n")
344 |                 outfile.write(f"CHAPTER04NAME={POST_ED}\n")
345 |     elif ed_only:
346 |         outfile.write(f"CHAPTER01NAME={EPISODE}\n")
347 |         outfile.write(f"CHAPTER02={get_timestamp(offset_list[0])}\n")
348 |         outfile.write(f"CHAPTER02NAME={ENDING}\n")
349 |         if not snap_end:
350 |             outfile.write(f"CHAPTER03={get_timestamp(offset_list[1])}\n")
351 |             outfile.write(f"CHAPTER03NAME={POST_ED}\n")
352 |     else:
353 |         outfile.write(f"CHAPTER01NAME={PRE_OP}\n")
354 |         outfile.write(f"CHAPTER02={get_timestamp(offset_list[0])}\n")
355 |         outfile.write(f"CHAPTER02NAME={OPENING}\n")
356 |         outfile.write(f"CHAPTER03={get_timestamp(offset_list[1])}\n")
357 |         outfile.write(f"CHAPTER03NAME={EPISODE}\n")
358 |         if len(offset_list) == 4:
359 |             outfile.write(f"CHAPTER04={get_timestamp(offset_list[2])}\n")
360 |             outfile.write(f"CHAPTER04NAME={ENDING}\n")
361 |             if not snap_end:
362 |                 outfile.write(f"CHAPTER05={get_timestamp(offset_list[3])}\n")
363 |                 outfile.write(f"CHAPTER05NAME={POST_ED}\n")
364 | 
365 |     outfile.close()
366 | 
367 | def validate_themes(args, t_path):
368 |     if args.no_download:
369 |         valid = False
370 |         if os.path.isdir(t_path):
371 |             for theme_file in os.scandir(t_path):
372 |                 if ".ogg" in str(theme_file):
373 |                     valid = True
374 |         if not valid:
375 |             print("No valid themes. Specify a search-name to download themes.", file=sys.stderr)
376 |             sys.exit(1)
377 | 
378 | def try_download(args, t_path):
379 |     if not args.no_download:
380 |         print("Searching AnimeThemes...", end="", flush=True)
381 |         try:
382 |             series_json = get_series_json(args)
383 |             print(f'\rAnimeThemes matched series: {series_json["name"]}', file=sys.stderr)
384 |             print_seperator()
385 |             download_themes(t_path, args, series_json)
386 |             print_seperator()
387 |         except Exception as exc:
388 |             print(f"\rCouldn't access api or download: {exc}", file=sys.stderr)
389 | 
390 | def extract_episode_audio(args):
391 |     file_path = args.input
392 | 
393 |     extract_path = f"{str(file_path)}.autochap.wav"
394 |     try:
395 |         os.remove(extract_path)
396 |     except OSError:
397 |         pass
398 | 
399 |     if str(file_path).endswith(".mkv"):
400 |         print("Extracting episode audio...", end="", flush=True)
401 |         # Extract as a temp wav file
402 |         output = subprocess.run(["ffmpeg", "-hide_banner", "-loglevel", "error", "-n", "-i",
403 |                                  file_path, "-map", "0:a:0", extract_path], capture_output=True)
404 |         if len(output.stderr) > 0:
405 |             print("\rextraction error          ", file=sys.stderr)
406 |             print(output.stderr.decode())
407 |             sys.exit(1)
408 | 
409 |         args.episode_audio_path = extract_path
410 |         print("\rExtracted episode audio      ")
411 |     else:
412 |         args.episode_audio_path = str(args.input)
413 | 
414 | def process_themes(t_path, args, theme_files, theme_type, y_episode, sr_episode):
415 |         matched_flag = False
416 |         local_offset_list = []
417 |         for (theme_name, theme_path) in theme_files:
418 |             if theme_type in theme_name and matched_flag:
419 |                 print(f"{theme_name}: Skipping because already matched an {theme_type}", file=sys.stderr)
420 |                 continue
421 |             offset1, offset2 = find_offset(y_episode, sr_episode, theme_path, t_path, args)
422 |             if offset1 is not None:
423 |                 matched_flag = True
424 |                 local_offset_list.append(offset1)
425 |                 local_offset_list.append(offset2)
426 | 
427 |         return local_offset_list
428 | 
429 | def match_themes(args, t_path):
430 |     op_files = []
431 |     ed_files = []
432 |     for theme_file in os.scandir(t_path):
433 |         if ".ogg" in str(theme_file):
434 |             theme_name = os.path.splitext(Path(theme_file.path).name)[0]
435 |             theme_path = Path(theme_file.path)
436 |             if "OP" in theme_name:
437 |                 op_files.append((theme_name, theme_path))
438 |             elif "ED" in theme_name:
439 |                 ed_files.append((theme_name, theme_path))
440 | 
441 |     offset_list = []
442 |     print("Matching themes...")
443 | 
444 |     try:
445 |         y_episode, sr_episode = librosa.load(str(args.episode_audio_path), sr=None)
446 |     except Exception as exc:
447 |         print(f"Could not load input file - {str(args.episode_audio_path)}: {exc}", file=sys.stderr)
448 |         sys.exit(1)
449 | 
450 |     with ThreadPoolExecutor(max_workers=2) as executor:
451 |         future_op = executor.submit(process_themes, t_path, args, op_files, "OP", y_episode, sr_episode)
452 |         future_ed = executor.submit(process_themes, t_path, args, ed_files, "ED", y_episode, sr_episode)
453 | 
454 |         for future in as_completed([future_op, future_ed]):
455 |             offset_list.extend(future.result())
456 | 
457 |     return offset_list
458 | 
459 | def time_to_frame(timesec, framerate, floor = True):
460 |     frame = timesec * framerate
461 |     if floor:
462 |         return math.floor(frame)
463 |     else:
464 |         return math.ceil(frame)
465 | 
466 | def frame_to_time(frame, framerate, floor = True):
467 |     if floor:
468 |         middle_frame = max(0, frame - 0.5)
469 |     else:
470 |         middle_frame = frame + 0.5
471 | 
472 |     secs = middle_frame / framerate
473 | 
474 |     return secs
475 | 
476 | def generate_search_pattern(window):
477 |     result = [window + 1]
478 | 
479 |     for i in range(1, window + 1):
480 |         result.append(window + 1 - i)
481 |         result.append(window + 1 + i)
482 | 
483 |     return result
484 | 
485 | def get_keyframe_frame(frame, snap_window_frames, clip_length, clip, core):
486 |     # Generate the keyframes in range with one more at the beginning since the first is always keyframe
487 |     # Scxvid needs to go sequentially and wwxd is inaccurate in testing
488 |     search_start_frame = max(frame - snap_window_frames - 1, 0)
489 |     search_end_frame = min(frame + snap_window_frames, clip_length) # Should already be one more than the last index
490 |     if search_start_frame >= search_end_frame:
491 |         return
492 |     trimmed_clip = clip[search_start_frame:search_end_frame]
493 |     try:
494 |         scxvid_clip = core.scxvid.Scxvid(trimmed_clip)
495 |     except Exception:
496 |         raise ImportError("You need to install Scxvid in vapoursynth plugins\n"
497 |                           "https://github.com/dubhater/vapoursynth-scxvid")
498 | 
499 |     search_pattern = generate_search_pattern(snap_window_frames)
500 |     for i in search_pattern:
501 |         if i <= 0 or i >= trimmed_clip.num_frames:
502 |             continue
503 |         actual_frame = frame - snap_window_frames - 1 + i
504 |         props = scxvid_clip.get_frame(i).props
505 |         scenechange = props._SceneChangePrev
506 |         if scenechange:
507 |             return actual_frame
508 | 
509 | def snap(args, offset_list):
510 |     try:
511 |         import vapoursynth as vs
512 |         from vapoursynth import core
513 |     except Exception:
514 |         raise ImportError("You need to install in vapoursynth for snapping\n"
515 |                           "https://github.com/vapoursynth/vapoursynth")
516 | 
517 |     try:
518 |         clip = core.ffms2.Source(source=args.input, cache=False)
519 |     except Exception:
520 |         raise ImportError("Could not load video or you haven't installed ffms2 in vapoursynth plugins for snapping\n"
521 |                           "https://github.com/FFMS/ffms2")
522 | 
523 |     print(f"Snapping chapters...", end="", flush=True)
524 | 
525 |     clip = core.resize.Bilinear(clip, 640, 360, format=vs.YUV420P8)
526 | 
527 |     clip_length = clip.num_frames
528 |     fps = float(clip.fps.numerator) / float(clip.fps.denominator)
529 | 
530 |     snapped_offsets = []
531 |     for offset in offset_list:
532 |         offset_frame =  time_to_frame(offset, fps, floor=False)
533 |         snap_window_frames = round(args.snap / 1000 * fps)
534 |         snap_frame = get_keyframe_frame(offset_frame, snap_window_frames, clip_length, clip, core)
535 | 
536 |         if snap_frame:
537 |             snapped_offsets.append(frame_to_time(snap_frame, fps, floor=True))
538 |         else:
539 |             snapped_offsets.append(offset)
540 | 
541 |     return snapped_offsets
542 | 
543 | def print_snapped_times(offset_list, file_duration, args):
544 |     print("\rSnapped times:        ", file=sys.stderr)
545 | 
546 |     # Episode duration snapping
547 |     ep_snapped_offsets = offset_list.copy()
548 |     if ep_snapped_offsets[0] < args.episode_snap:
549 |         ep_snapped_offsets[0] = 0
550 | 
551 |     if ep_snapped_offsets[-1] > (file_duration - args.episode_snap):
552 |         ep_snapped_offsets[-1] = file_duration
553 | 
554 |     print(f"{get_timestamp(ep_snapped_offsets[0])} -> {get_timestamp(ep_snapped_offsets[1])}", file=sys.stderr)
555 | 
556 |     if len(ep_snapped_offsets) == 4:
557 |         print(f"{get_timestamp(ep_snapped_offsets[2])} -> {get_timestamp(ep_snapped_offsets[3])}", file=sys.stderr)
558 | 
559 | def main():
560 |     args = parse_args()
561 |     t_path = os.path.join(args.work_path, ".themes")
562 | 
563 |     try:
564 |         validate_themes(args, t_path)
565 |         make_folders(args.work_path)
566 |         try_download(args, t_path)
567 |         extract_episode_audio(args)
568 |         offset_list = match_themes(args, t_path)
569 | 
570 |         file_duration = librosa.get_duration(path=str(args.episode_audio_path))
571 | 
572 |         offset_list.sort()
573 |         if chapter_validator(offset_list, file_duration):
574 |             if args.snap:
575 |                 print_seperator()
576 |                 offset_list = snap(args, offset_list)
577 |                 print_snapped_times(offset_list, file_duration, args)
578 |             generate_chapters(offset_list, file_duration, args)
579 |     finally:
580 |         if args.delete_themes:
581 |             shutil.rmtree(t_path)
582 |         try:
583 |             if args.episode_audio_path.endswith(".autochap.wav"):
584 |                 os.remove(args.episode_audio_path)
585 |         except Exception:
586 |             pass
587 | 
588 | if __name__ == "__main__":
589 |     main()


--------------------------------------------------------------------------------
/Auto_Chap/Changelog.md:
--------------------------------------------------------------------------------
 1 | # Auto_Chap Changelog
 2 | 
 3 | ## V3.0
 4 | - Make chapters frame-perfect by using `--snap` to snap them to scene changes within a certain millisecond window. This requires new dependencies [Vapoursynth](https://github.com/vapoursynth/vapoursynth), [ffm2](https://github.com/FFMS/ffms2), and [vapoursynth-scxvid](https://github.com/dubhater/vapoursynth-scxvid). If you are not using this feature then these dependencies do not need to be installed. It efficiently generates needed keyframes and is very fast compared to generating keyframes for the entire episode, taking only about 2 seconds.
 5 | - Filter search year of the series using `--year`.
 6 | - Filter for series released on or after that year using a negative number after `--year`. I added this because search for multi-season shows can sometimes give you the wrong one.
 7 | 
 8 | ## V3.1
 9 | - Fix for matches right at the beginning of the episode being off by a few seconds. This is quite important as it would sometimes skip the first cut after the OP such as in Apothecary Diaries episode 12.
10 | - Switch to using AnimeThemes' slug for theme names so that stuff like ED-TV for Frieren are now downloaded.
11 | - Added progress indicator messages.
12 | - More error handling for snap.
13 | - Printed snapped times should now be the same as final output chapter times.
14 | - Snap now defaults to 1000ms if no value added.
15 | 
16 | ## V3.2
17 | - Theme downloading now checks all versions of the theme so that stuff like Frieren's cour 2 ED that uses a different part of the same song can now be grabbed.
18 | - Redid progress indicators again with separators and better formatted messages.
19 | - Theme downloader now checks if a theme is actually present in the directory even if data.json says it should be and re-downloads if it is not.
20 | 
21 | ## V3.3
22 | - Updated to work on new animethemes api. Since "updated_at" times are no longer used, data.json now uses the "filename" data for each theme to keep track of if they need updating. This means every series will have to be updated and themes redownloaded to fit the new data.json format.
23 | 
24 | ## V3.4
25 | - Found out how to access "updated_at" again which should mean that it can detect more accurately when a theme has been updated. Updated data.json to store both "updated_at" and the animethemes "filename" but unfortunately this format change means all themes will have to be redownloaded again.
26 | 
27 | ## V4.0
28 | - Up to **2-3x faster theme matching** by using ffmpeg to extract the audio from mkv to a temp file, downsampling the audio, and using 2 threads for matching OP and ED in parallel. Chart creation is also done in parallel on a separate process
29 |     - The downsampling factor defaults to 8 and can be changed using `--downsample 32` for example. There are diminishing returns as you increase it.
30 |     - ffmpeg is required in PATH, it will now create a temp `.autochap.wav` file if the input is an mkv file. The temp file uses the first audio track and may be quite big.
31 | - Up to **2-5x faster theme downloading** by downloading in parallel. Speed up depends on how many themes in the series, internet speeds, etc.
32 |     - Themes to download in parallel defaults to 10 and can be changed using `--parallel-dl 2` for example
33 | - Reduced false positives where only the beginning of a theme is played by using the entire theme audio to match instead of first 30 seconds
34 | - Fixed `--year` to only match that specific year if the number is non-negative
35 | 
36 | ## V4.1
37 | - Up to **2-3x faster theme matching** than V4.0
38 |     - Speed-up applies more when there are more themes that need to be matched. Optimisation includes only loading the episode audio once at the beginning and using the audioread module to load theme files.
39 | - Fixed compatibility issue with macos caused by audioread loading.
40 | - Use `--score` to adjust how lenient the matching should be. Increase from default to reduce false positives. Decrease it to be more lenient. Score is y-axis in charts divided by the downsample factor.
41 | - Increased default score from 2000 to 4000
42 | - Increased default downsample factor from 8 to 32
43 | 
44 | ## V4.1a
45 | - Revert default score from 4000 back to 2000
46 | - Please give me feedback on the change in downsample default. If 32 causes any errors then let me know.
47 | 
48 | ## V4.2
49 | - Fixed non-matches for episodes where themes in the episode are slightly shorter such as when a theme is played at the very end of the episode. Now uses the beginning 90% of the theme for matching. The portion of the theme used can be changed using `--theme-portion`
50 | - Moved episode snap option for snapping chapters to the start and end of the episode to an optional argument using `--episode-snap` The default is still 4 seconds so that previews/endscreens 5 seconds long will get their own chapter.
51 | - Fixed typo in chapter output when there is only an ED.
52 | - Rearranged arguments and fixed typos.
53 | - Trimmed trailing whitespace.
54 | - Standardised error messages.


--------------------------------------------------------------------------------
/Auto_Chap/requirements.txt:
--------------------------------------------------------------------------------
1 | requests==2.31.0
2 | numpy==1.26.2
3 | scipy==1.11.4
4 | matplotlib==3.8.2
5 | librosa==0.10.1


--------------------------------------------------------------------------------
/Chapter_Snapper/Chapter_Snapper.py:
--------------------------------------------------------------------------------
  1 | # Chapter Snapper V2.1
  2 | import sys
  3 | import bisect
  4 | import math
  5 | import re
  6 | import time
  7 | import argparse
  8 | from pathlib import Path
  9 | 
 10 | def parse_args():
 11 |     parser = argparse.ArgumentParser(description="Snap chapters to nearest keyframe.")
 12 |     parser.add_argument(
 13 |         "--input", "-i", type=Path, required=True,
 14 |         help="Chapter file. Must be in simple format.",
 15 |     )
 16 |     
 17 |     parser.add_argument(
 18 |         "--keyframes", "-kf", type=Path, required=True,
 19 |         help="SCXvid keyframes. Try to have minimal mkv delay or it might not line up.",
 20 |     )
 21 |     
 22 |     parser.add_argument(
 23 |         "--output", "-o", type=Path,
 24 |         help="Output chapter file. Defaults to where input is.",
 25 |     )
 26 |     
 27 |     parser.add_argument(
 28 |         "--snap-ms", "-s", type=int, default=1000,
 29 |         help="How many milliseconds to consider snapping to. Defaults to 1000ms.",
 30 |     )
 31 |     
 32 |     parser.add_argument(
 33 |         "--fps", type=float, default=23.976,
 34 |         help="FPS of the video. Defaults to 23.976.",
 35 |     )
 36 |     
 37 |     args = parser.parse_args()
 38 |     
 39 |     if args.output is None:
 40 |         args.output = args.input.with_name(args.input.stem + "_snapped.txt")
 41 |         
 42 |     return args
 43 | 
 44 | def parse_srt_time(string):
 45 |     hours, minutes, seconds, milliseconds = map(int, re.match(r"(\d+):(\d+):(\d+)\.(\d+)", string).groups())
 46 |     return hours * 3600000 + minutes * 60000 + seconds * 1000 + milliseconds 
 47 |         
 48 | def parse_scxvid_keyframes(text):
 49 |     return [i-3 for i,line in enumerate(text.splitlines()) if line and line[0] == "i"]
 50 |         
 51 | def parse_keyframes(path):
 52 |     with open(path) as file_object:
 53 |         text = file_object.read()
 54 |     if text.find("# XviD 2pass stat file")>=0:
 55 |         frames = parse_scxvid_keyframes(text)
 56 |     else:
 57 |         raise Exception("Unsupported keyframes type")
 58 |     if 0 not in frames:
 59 |         frames.insert(0, 0)
 60 |     return frames
 61 |         
 62 | class Timecodes(object):
 63 |     TIMESTAMP_END = 1
 64 |     TIMESTAMP_START = 2
 65 | 
 66 |     def __init__(self, times, default_fps):
 67 |         super(Timecodes, self).__init__()
 68 |         self.times = times
 69 |         self.default_frame_duration = 1000.0 / default_fps if default_fps else None
 70 | 
 71 |     def get_frame_time(self, number, kind=None):
 72 |         if kind == self.TIMESTAMP_START:
 73 |             prev = self.get_frame_time(number-1)
 74 |             curr = self.get_frame_time(number)
 75 |             return prev + int(round((curr - prev) / 2.0))
 76 |         elif kind == self.TIMESTAMP_END:
 77 |             curr = self.get_frame_time(number)
 78 |             after = self.get_frame_time(number+1)
 79 |             return curr + int(round((after - curr) / 2.0))
 80 | 
 81 |         try:
 82 |             return self.times[number]
 83 |         except IndexError:
 84 |             if not self.default_frame_duration:
 85 |                 raise ValueError("Cannot calculate frame timestamp without frame duration")
 86 |             past_end, last_time = number, 0
 87 |             if self.times:
 88 |                 past_end, last_time = (number - len(self.times) + 1), self.times[-1]
 89 | 
 90 |             return int(round(past_end * self.default_frame_duration + last_time))
 91 | 
 92 |     def get_frame_number(self, ms, kind=None):
 93 |         if kind == self.TIMESTAMP_START:
 94 |             return self.get_frame_number(ms - 1) + 1
 95 |         elif kind == self.TIMESTAMP_END:
 96 |             return self.get_frame_number(ms - 1)
 97 | 
 98 |         if self.times and self.times[-1] >= ms:
 99 |             return bisect.bisect_left(self.times, ms)
100 | 
101 |         if not self.default_frame_duration:
102 |             raise ValueError("Cannot calculate frame for this timestamp without frame duration")
103 | 
104 |         if ms < 0:
105 |             return int(math.floor(ms / self.default_frame_duration))
106 | 
107 |         last_time = self.times[-1] if self.times else 0
108 |         return int((ms - last_time) / self.default_frame_duration) + len(self.times)
109 | 
110 |     @classmethod
111 |     def _convert_v1_to_v2(cls, default_fps, overrides):
112 |         # start, end, fps
113 |         overrides = [(int(x[0]), int(x[1]), float(x[2])) for x in overrides]
114 |         if not overrides:
115 |             return []
116 | 
117 |         fps = [default_fps] * (overrides[-1][1] + 1)
118 |         for start, end, fps in overrides:
119 |             fps[start:end + 1] = [fps] * (end - start + 1)
120 | 
121 |         v2 = [0]
122 |         for d in (1000.0 / f for f in fps):
123 |             v2.append(v2[-1] + d)
124 |         return v2
125 | 
126 |     @classmethod
127 |     def parse(cls, text):
128 |         lines = text.splitlines()
129 |         if not lines:
130 |             return []
131 |         first = lines[0].lower().lstrip()
132 |         if first.startswith("# timecode format v2"):
133 |             tcs = [x for x in lines[1:]]
134 |             return Timecodes(tcs, None)
135 |         elif first.startswith("# timecode format v1"):
136 |             default = float(lines[1].lower().replace("assume ", ""))
137 |             overrides = (x.split(",") for x in lines[2:])
138 |             return Timecodes(cls._convert_v1_to_v2(default, overrides), default)
139 |         else:
140 |             raise Exception("This timecodes format is not supported")
141 | 
142 |     @classmethod
143 |     def from_file(cls, path):
144 |         with open(path) as file:
145 |             return cls.parse(file.read())
146 | 
147 |     @classmethod
148 |     def cfr(cls, fps):
149 |         return Timecodes([], default_fps=fps)
150 |     
151 | def get_closest_kf(frame, keyframes):
152 |     idx = bisect.bisect_left(keyframes, frame)
153 |     if idx == len(keyframes):
154 |         return keyframes[-1]
155 |     if idx == 0 or keyframes[idx] - frame < frame - (keyframes[idx-1]):
156 |         return keyframes[idx]
157 |     return keyframes[idx-1]
158 | 
159 | def validate_chapters(chapter_read):
160 |     if not chapter_read[0].startswith("CHAPTER01="):
161 |         print("Invalid chapter format.", file=sys.stderr)
162 |         sys.exit(1)
163 |         
164 | def apply(chapter_lines, timecodes, keyframes_list, snap_ms):
165 |     for idx, line in enumerate(chapter_lines):
166 |         if idx % 2 == 0:
167 |             chapter_split = line.split("=")
168 |             start_ms = parse_srt_time(chapter_split[1])
169 |             start_frame = timecodes.get_frame_number(start_ms, timecodes.TIMESTAMP_START)
170 |             closest_frame = get_closest_kf(start_frame, keyframes_list)
171 |             closest_time = timecodes.get_frame_time(closest_frame, timecodes.TIMESTAMP_START)
172 |             
173 |             if abs(closest_time - start_ms) <= snap_ms and start_ms != 0:
174 |                 start_ms = max(0, closest_time)
175 |                 timesec = start_ms/1000
176 |                 timestamp = time.strftime(f"%H:%M:%S.{round(timesec%1*1000):03}", time.gmtime(timesec))
177 |                 chapter_lines[idx] = f"{chapter_split[0]}={timestamp}\n"
178 |     
179 | def main():
180 |     args = parse_args()
181 |     
182 |     timecodes = Timecodes.cfr(args.fps)
183 |     keyframes_list = parse_keyframes(args.keyframes)
184 |     
185 |     with open(args.input, "r") as chapter_file:
186 |         chapter_lines = chapter_file.readlines()
187 |         
188 |     validate_chapters(chapter_lines)
189 |     apply(chapter_lines, timecodes, keyframes_list, args.snap_ms)
190 |             
191 |     with open(args.output, "w") as out_file:
192 |         out_file.writelines(chapter_lines)    
193 |     
194 | if __name__ == "__main__":
195 |     main()


--------------------------------------------------------------------------------
/Converter/Converter.py:
--------------------------------------------------------------------------------
  1 | # Converter V1.6
  2 | 
  3 | import sys
  4 | import copy
  5 | from datetime import timedelta
  6 | import ass
  7 | from ass.data import Color as Color
  8 | 
  9 | def set_info(doc):
 10 |     doc.info["Title"] = "[SubsPlus+]"
 11 |     doc.info["YCbCr Matrix"] = "TV.709"
 12 |     try:
 13 |         doc.info.pop("LayoutResX")
 14 |         doc.info.pop("LayoutResY")
 15 |     except KeyError:
 16 |         pass
 17 | 
 18 | def sort_signs(obj):
 19 |     if "\\pos" in obj.text:
 20 |         return 1
 21 |     else:
 22 |         return 0
 23 | 
 24 | 
 25 | def detect_styles(doc):
 26 |     def gen_style(style, event):
 27 |         new_style_num = style_num
 28 | 
 29 |         # Identify style type
 30 |         if "\\pos" in event.text:
 31 |             type = "Caption"
 32 |         else:
 33 |             type = "Subtitle"
 34 | 
 35 |         try:
 36 |             # Already mapped
 37 |             style_name = style_map[event.style][type]
 38 |         except:
 39 |             # Do the mapping
 40 |             if type == "Subtitle":
 41 |                 # If the other type is already in, then it's a double type style
 42 |                 try:
 43 |                     style_map[event.style]["Caption"]
 44 |                     # 900 styles for duplicates
 45 |                     style_name = f"Subtitle-{style_num + 900}"
 46 |                 except KeyError:
 47 |                     style_name = f"Subtitle-{style_num}"
 48 |                     new_style_num += 1
 49 |                 new_style = copy.deepcopy(style)
 50 |                 new_style.name = style_name
 51 |                 subtitle_styles.append(new_style)
 52 |             elif type == "Caption":
 53 |                 try:
 54 |                     style_map[event.style]["Subtitle"]
 55 |                     style_name = f"Caption-{style_num + 900}"
 56 |                 except KeyError:
 57 |                     style_name = f"Caption-{style_num}"
 58 |                     new_style_num += 1
 59 |                 new_style = copy.deepcopy(style)
 60 |                 new_style.name = style_name
 61 |                 caption_styles.append(new_style)
 62 | 
 63 |             try:
 64 |                 style_map[event.style][type] = style_name
 65 |             except:
 66 |                 style_map[event.style] = {}
 67 |                 style_map[event.style][type] = style_name
 68 | 
 69 |         event.style = style_name
 70 |         return new_style_num
 71 | 
 72 |     # Should match with the Q style number
 73 |     style_num = 0
 74 |     subtitle_styles = []
 75 |     caption_styles = []
 76 |     # Key = Q style, Value = new style
 77 |     style_map = {}
 78 |     for style in doc.styles:
 79 |         for event in doc.events:
 80 |             if style.name == event.style:
 81 |                 style_num = gen_style(style, event)
 82 | 
 83 |     doc.styles = subtitle_styles
 84 |     doc.styles.extend(caption_styles)
 85 | 
 86 | 
 87 | def song_detection(doc):
 88 |     # Figure out which styles are songs
 89 | 
 90 |     # Pass 1: 9 consecutive lines (Additive)
 91 |     possible_song_styles = [style.name for style in doc.styles if "Subtitle" in style.name and style.fontsize == 40]
 92 | 
 93 |     consecutive_lines = 0
 94 |     styles_involved = set()
 95 |     song_styles = set()
 96 |     for event in doc.events:
 97 |         if event.style not in possible_song_styles:
 98 |             consecutive_lines = 0
 99 |             styles_involved = set()
100 |             continue
101 |         consecutive_lines += 1
102 |         styles_involved.add(event.style)
103 |         if consecutive_lines > 8 and event.style not in song_styles:
104 |             # song_styles.update(styles_involved)
105 |             song_styles.add(event.style)
106 | 
107 |     # Pass 2: Time based 7 consecutive (Additive)
108 | 
109 |     for style in possible_song_styles:
110 |         consecutive_events = 0
111 |         allowed_pauses = 1
112 |         last_event_end = timedelta(0)
113 |         for event in doc.events:
114 |             if event.style != style:
115 |                 continue
116 |             time_difference = event.start - last_event_end
117 |             if time_difference < timedelta(seconds=1):
118 |                 # if they are consecutive within 1 second
119 |                 consecutive_events += 1
120 |                 if consecutive_events == 7: # arbritary amount
121 |                     song_styles.add(style)
122 |                     break
123 |             elif time_difference < timedelta(seconds=5)and allowed_pauses > 0:
124 |                 allowed_pauses -= 1
125 |                 consecutive_events += 1
126 |                 if consecutive_events == 7: # arbritary amount
127 |                     song_styles.add(style)
128 |                     break
129 |             else:
130 |                 consecutive_events = 0
131 |                 allowed_pauses = 1
132 |             last_event_end = event.end
133 | 
134 |     # Pass 3: Fill Gap (Additive)
135 |     last_event_end = timedelta(seconds=-50)
136 |     for event in doc.events:
137 |         if event.style not in song_styles:
138 |             continue
139 |         time_difference = event.start - last_event_end
140 |         if time_difference < timedelta(seconds=10) and time_difference > timedelta(seconds=0):
141 |             # Gap found, look for song events inside
142 |             for inside_event in doc.events:
143 |                 if inside_event.style not in possible_song_styles:
144 |                     continue
145 |                 if not (inside_event.start >= last_event_end and inside_event.end <= event.start):
146 |                     continue
147 |                 print(f"Gap filled: {inside_event.text}")
148 |                 song_styles.add(inside_event.style)
149 |         last_event_end = event.end
150 | 
151 |     # Pass 4: Subset styles (Additive)
152 |     # Basically for when they do romaji and english at the same time
153 | 
154 |     remaining_contenders = [item for item in possible_song_styles if item not in song_styles]
155 | 
156 |     song_styles_times = [] # List of list (each style) of tuple: (event.start, event.end)
157 |     for style in song_styles:
158 |         style_times = []
159 |         for event in doc.events:
160 |             if event.style != style:
161 |                 continue
162 |             style_times.append((event.start, event.end))
163 |         song_styles_times.append(style_times)
164 | 
165 |     remaining_contenders_times = {}
166 |     for style in remaining_contenders:
167 |         style_times = []
168 |         for event in doc.events:
169 |             if event.style != style:
170 |                 continue
171 |             style_times.append((event.start, event.end))
172 |         remaining_contenders_times[style] = style_times
173 | 
174 |     # Find subsets
175 |     for remaining_style_key in remaining_contenders_times:
176 |         remaining_style_value = remaining_contenders_times[remaining_style_key]
177 |         for song_style in song_styles_times:
178 |             if all(item in song_style for item in remaining_style_value):
179 |                 print(f"By Subset: {remaining_style_key}")
180 |                 song_styles.add(remaining_style_key)
181 | 
182 |     # Update style
183 |     for style in doc.styles:
184 |         if style.name in song_styles:
185 |             style.name = style.name.replace("Subtitle", "Song")
186 | 
187 |     # Update events
188 |     for event in doc.events:
189 |         if event.style in song_styles:
190 |             event.style = event.style.replace("Subtitle", "Song")
191 | 
192 | 
193 | def restrictive_song_detection(doc):
194 |     song_style = ass.Style(name='Song', fontname='Sub Alegreya', fontsize=46, primary_color=Color(r=0xff, g=0xff, b=0xff, a=0x00), secondary_color=Color(r=0xff, g=0xff, b=0xff, a=0x00), outline_color=Color(r=0x72, g=0x0c, b=0x5f, a=0x00), back_color=Color(r=0x00, g=0x00, b=0x00, a=0xa0), bold=True, italic=False, underline=False, strike_out=False, scale_x=100.0, scale_y=100.0, spacing=0.1, angle=0.0, border_style=1, outline=2.2, shadow=0, alignment=8, margin_l=100, margin_r=100, margin_v=25, encoding=1)
195 |     possible_song_styles = [style.name for style in doc.styles if "Subtitle" in style.name and style.fontsize == 40]
196 | 
197 |     song_blocks = [] # Blocks of song events
198 | 
199 |     # Find all blocks of song events (More than 8 consecutive an8)
200 |     consecutive_lines = 0
201 |     events_involved = []
202 |     for event in doc.events:
203 |         if event.style not in possible_song_styles:
204 |             if consecutive_lines > 8:
205 |                 song_blocks.append(events_involved)
206 |             consecutive_lines = 0
207 |             events_involved = []
208 |             continue
209 |         consecutive_lines += 1
210 |         events_involved.append(event)
211 | 
212 |     if len(song_blocks) == 0:
213 |         return
214 | 
215 |     # Add Song style to styles
216 |     insert_pos = len(doc.styles)
217 |     for idx, style in enumerate(doc.styles):
218 |         if "Caption" in style.name:
219 |             insert_pos = idx
220 |             break
221 |     doc.styles.insert(insert_pos, song_style)
222 | 
223 |     for event in song_blocks[0]:
224 |         event.style = "Song"
225 | 
226 |     if len(song_blocks) >= 2:
227 |         # Skip doing middle song blocks. Since hopefully ED will be the last one
228 |         for event in song_blocks[-1]:
229 |             event.style = "Song"
230 | 
231 | 
232 | def manual_caption2song(doc):
233 |     pass
234 | 
235 | 
236 | def rescale_captions(doc):
237 |     for style in doc.styles:
238 |         if "Caption" in style.name:
239 |             style.fontsize = round(style.fontsize / 1.2 * 1.125)
240 | 
241 | 
242 | def restyler(doc):
243 |     for style in doc.styles:
244 |         if "Subtitle" in style.name:
245 |             style.primary_color = Color.from_ass("&H00FFFFFF")
246 |             style.secondary_color = Color.from_ass("&H00FFFFFF")
247 |             style.outline_color = Color.from_ass("&H00000000")
248 |             style.back_color = Color.from_ass("&HA0000000")
249 |             style.bold = True
250 |             style.outline = 2.4
251 |             style.shadow = 1
252 |             style.margin_l = 40
253 |             style.margin_r = 40
254 |             style.margin_v = 40
255 |             if style.fontname == "Swis721 BT":
256 |                 if style.fontsize == 40:
257 |                     style.alignment = 8
258 |                 if style.fontsize == 48 or style.fontsize == 40:
259 |                     style.fontsize = 50
260 |                 style.fontname = "SPOverrideF"
261 |             elif style.fontname == "Chiller" and style.fontsize < 63: # Change later
262 |                 # send to an8
263 |                 pass
264 |         elif "Song" in style.name:
265 |             style.fontname = "Sub Alegreya"
266 |             style.fontsize = 46
267 |             style.primary_color = Color.from_ass("&H00FFFFFF")
268 |             style.secondary_color = Color.from_ass("&H00FFFFFF")
269 |             style.outline_color = Color.from_ass("&H005F0C72")
270 |             style.back_color = Color.from_ass("&HA0000000")
271 |             style.bold = True
272 |             style.spacing = 0.1
273 |             style.outline = 2.2
274 |             style.shadow = 0
275 |             style.margin_l = 40
276 |             style.margin_r = 40
277 |             style.margin_v = 25
278 |             style.alignment = 8
279 |         elif "Caption" in style.name:
280 |             if style.primary_color.to_ass() == "&H0094FDFF":
281 |                 style.primary_color = Color.from_ass("&H0000FFFF")
282 |                 style.secondary_color = Color.from_ass("&H0000FFFF")
283 |             style.outline = 1
284 |             style.shadow = 1
285 |             style.margin_l = 20
286 |             style.margin_r = 20
287 |             style.margin_v = 20
288 |             # style.spacing = 0.01
289 | 
290 | 
291 | def fix_small_font_shenanigans(doc):
292 |     # Fix times where it is going subtitle font size 48 -> 40 -> 32 as an effect
293 | 
294 |     styles_by_name = {style.name: style for style in doc.styles}
295 | 
296 |     for i, event in enumerate(doc.events):
297 |         style = styles_by_name.get(event.style)
298 |         if not (style and style.alignment == 8 and "Subtitle" in style.name):
299 |             continue
300 | 
301 |         if i + 1 < len(doc.events):
302 |             next_event = doc.events[i + 1]
303 |             next_style = styles_by_name.get(next_event.style)
304 |             if not ("Subtitle" in next_style.name and next_style.fontsize < 40):
305 |                 continue
306 |             small_style = copy.deepcopy(style)
307 |             small_style.name = "Subtitle-Small"
308 |             small_style.alignment = 2
309 |             small_style.fontsize = 40
310 |             doc.styles.append(small_style)
311 |             event.style = "Subtitle-Small"
312 | 
313 | 
314 | 
315 | def main(inpath, outpath):
316 |     with open(inpath, encoding='utf_8_sig') as f:
317 |         doc = ass.parse(f)
318 | 
319 |     set_info(doc)
320 |     doc.events = sorted(doc.events, key=sort_signs)
321 |     detect_styles(doc)
322 |     song_detection(doc)
323 |     # restrictive_song_detection(doc)
324 |     restyler(doc)
325 |     fix_small_font_shenanigans(doc)
326 |     rescale_captions(doc)
327 | 
328 |     with open(outpath, "w", encoding='utf_8_sig') as f:
329 |         doc.dump_file(f)
330 | 
331 | 
332 | if __name__ == "__main__":
333 |     if len(sys.argv) != 3:
334 |         sys.exit(f"Usage: {sys.argv[0]} infile.ass outfile.ass")
335 |     main(sys.argv[1], sys.argv[2])
336 | 


--------------------------------------------------------------------------------
/Hidive_Splitter/Hidive_Splitter.py:
--------------------------------------------------------------------------------
  1 | # Hidive_Splitter V2.1
  2 | import sys
  3 | import re
  4 | 
  5 | # match an ASS event with named groups and newline on the end
  6 | def line2dict(line):
  7 |     line_pattern = re.compile(r"(?P<Format>[^:]*): ?(?P<Layer>\d*), ?(?P<Start>[^,]*), ?(?P<End>[^,]*), ?(?P<Style>[^,]*), ?(?P<Name>[^,]*), ?(?P<MarginL>[^,]*), ?(?P<MarginR>[^,]*), ?(?P<MarginV>[^,]*), ?(?P<Effect>[^,]*),(?P<Text>.*\n)")
  8 |     """pull fields out of ass event into dictionary
  9 |     takes string line as argument and returns dictionary or None if line is not an ASS event"""
 10 |     # print(line) # <- fun UnicodeEncodeErrors!
 11 |     match = line_pattern.match(line)
 12 |     if match:
 13 |         return {key: match.group(key) for key in line_pattern.groupindex}
 14 |     else:
 15 |         return None
 16 | 
 17 | def dict2line(d):
 18 |     return "{Format}: {Layer},{Start},{End},{Style},{Name},{MarginL},{MarginR},{MarginV},{Effect},{Text}".format(**d)
 19 | 
 20 | def timestamp_to_centiseconds(timestamp):
 21 |     timestamp_split = timestamp.split(":")
 22 |     timestamp_sec_split = timestamp_split[2].split(".")
 23 |     hour = int(timestamp_split[0])
 24 |     minute = int(timestamp_split[1])
 25 |     second = int(timestamp_sec_split[0])
 26 |     centisecond = int(timestamp_sec_split[1])
 27 |     
 28 |     centiseconds = 360000*hour + 6000*minute + 100*second + centisecond
 29 |     return centiseconds
 30 | 
 31 | def generate_styles_dict(lines_list):
 32 |     # Used to compare if two styles are the same and can be merged
 33 |     styles_dict = {}
 34 |     for line in lines_list:
 35 |         if line.startswith("Style: "):
 36 |             style_split = line.split(",", 1) 
 37 |             style_name = style_split[0].replace("Style: ", "")
 38 |             style_info = style_split[1]
 39 |             styles_dict[style_name] = style_info
 40 |             
 41 |     return styles_dict
 42 | 
 43 | def split_subs(lines_list):
 44 |     out_lines = []
 45 |     for line in lines_list:
 46 |         if line.startswith("Dialogue: "):
 47 |             line_dict = line2dict(line)
 48 |             if not "Caption" in line_dict["Style"]:
 49 |                 split_lines(line_dict, out_lines)
 50 |             else:
 51 |                 out_lines.append(line)
 52 |         else: 
 53 |             out_lines.append(line)
 54 |             
 55 |     return out_lines
 56 |             
 57 | def combine_lines(lines_list, styles_dict):
 58 |     out_lines = []
 59 |     for line in lines_list:
 60 |         if not line.startswith("Dialogue: "):
 61 |             out_lines.append(line)
 62 |             continue        
 63 |         
 64 |         has_combined = False 
 65 |         for j in range(1, 6): # Check 5 previous elements
 66 |             check_line = out_lines[-j]
 67 |             if check_line.startswith("Dialogue: "):
 68 |                 check_line = line2dict(check_line)
 69 |             else:
 70 |                 continue
 71 |             has_combined = combine(line2dict(line), check_line, out_lines, styles_dict)
 72 |             if has_combined:
 73 |                 break
 74 |         
 75 |         if not has_combined:
 76 |             out_lines.append(line)
 77 |     
 78 |     return out_lines
 79 |     
 80 | def split_lines(line_dict, lines_list):
 81 |     # Split a particular line
 82 |     # Need to be inversed to preserve render positions
 83 |     result = []
 84 |     line_split = line_dict["Text"].split("\\N")
 85 |     original_style = line_dict["Style"]
 86 |     add_style = line_dict["Style"]
 87 |     add_line = []
 88 |     
 89 |     for line in line_split:
 90 |         if re.search(r"\\r(.*?)}", line):
 91 |             new_style = re.search(r"\\r(.*?)}", line).group(1)
 92 |             line = re.search(r"}(.*?){", line).group(1)
 93 |         else:
 94 |             new_style = original_style
 95 |         
 96 |         if add_style == new_style:
 97 |             add_line.append(line)
 98 |         else: # Style change
 99 |             # Add everything before as an event
100 |             to_add = line_dict.copy()
101 |             to_add["Style"] = add_style
102 |             to_add["Text"] = "\\N".join(add_line).rstrip("\n") + "\n"
103 |             result.append(dict2line(to_add))
104 |             
105 |             # Start a new event
106 |             add_style = new_style
107 |             add_line = [line]           
108 |     
109 |     to_add = line_dict.copy()     
110 |     to_add["Style"] = add_style
111 |     to_add["Text"] = "\\N".join(add_line).rstrip("\n") + "\n"
112 |     result.append(dict2line(to_add))
113 |     result.reverse()
114 |     lines_list.extend(result)
115 | 
116 | def combine(line, check, lines_list, styles_dict):
117 |     if check["Text"] == line["Text"] and styles_dict[check["Style"]] == styles_dict[line["Style"]] and check["End"] == line["Start"]:
118 |         line["Start"] = check["Start"] # Combine lines
119 |         for x in range(1, 8): # Arbritrary lookback amount
120 |             if lines_list[-x] == dict2line(check): 
121 |                 lines_list[-x] = dict2line(line)
122 |                 return True
123 |     return False
124 |             
125 | def reorder_simultaneous_lines(lines_list):
126 |     # Adjust which simulataneous lines are top and bottom
127 |     # Unused as it could mess with speaker order
128 |     mem_start = -1
129 |     mem_end = -1
130 |     for x, line in enumerate(lines_list):
131 |         if line.startswith("Dialogue: "):  
132 |             d = line2dict(line)
133 |             if d["Style"].startswith("Subtitle"):
134 |                 start = timestamp_to_centiseconds(d["Start"])
135 |                 end = timestamp_to_centiseconds(d["End"])
136 |                 
137 |                 if start == mem_start and end != mem_end:
138 |                     if mem_end < end:
139 |                         last_line = lines_list[x-1]
140 |                         lines_list[x-1] = lines_list[x]
141 |                         lines_list[x] = last_line
142 |                     
143 |                 mem_start = start
144 |                 mem_end = end
145 |         
146 | def main(inpath, outpath):
147 |     with open(inpath, encoding="utf-8") as infile:
148 |         lines_list = infile.readlines()
149 |         
150 |     styles_dict = generate_styles_dict(lines_list)
151 | 
152 |     subs_split = split_subs(lines_list)
153 |     final_lines = combine_lines(subs_split, styles_dict)
154 |             
155 |     with open(outpath, "w", encoding="utf-8") as outfile:
156 |         outfile.writelines(final_lines)
157 | 
158 | if __name__ == "__main__":
159 |     if len(sys.argv) != 3:
160 |         sys.exit(f"Usage: {sys.argv[0]} infile.ass outfile.ass")
161 |         
162 |     main(sys.argv[1], sys.argv[2])


--------------------------------------------------------------------------------
/Overlap_Blue/Overlap_Blue.py:
--------------------------------------------------------------------------------
  1 | # Overlap_Blue V2.3
  2 | import sys
  3 | import re
  4 | 
  5 | DIALOGUE_FONT = "SPOverrideF" # Set to None to be any font
  6 | COLOR_HEX = "&H743E15&"
  7 | CENTISECOND_LENIENCY = 3 # Don't count as overlap if it is less than _ centiseconds
  8 | 
  9 | def line2dict(line):
 10 |     line_pattern = re.compile(r"(?P<Format>[^:]*): ?(?P<Layer>\d*), ?(?P<Start>[^,]*), ?(?P<End>[^,]*), ?(?P<Style>[^,]*), ?(?P<Name>[^,]*), ?(?P<MarginL>[^,]*), ?(?P<MarginR>[^,]*), ?(?P<MarginV>[^,]*), ?(?P<Effect>[^,]*),(?P<Text>.*\n)")
 11 |     """pull fields out of ass event into dictionary
 12 |     takes string line as argument and returns dictionary or None if line is not an ASS event"""
 13 |     # print(line) # <- fun UnicodeEncodeErrors!
 14 |     match = line_pattern.match(line)
 15 |     if match:
 16 |         return {key: match.group(key) for key in line_pattern.groupindex}
 17 |     else:
 18 |         return None
 19 | 
 20 | def dict2line(d):
 21 |     return "{Format}: {Layer},{Start},{End},{Style},{Name},{MarginL},{MarginR},{MarginV},{Effect},{Text}".format(**d)
 22 | 
 23 | def timestamp_to_centiseconds(timestamp):
 24 |     timestamp_split = timestamp.split(":")
 25 |     timestamp_sec_split = timestamp_split[2].split(".")
 26 |     hour = int(timestamp_split[0])
 27 |     minute = int(timestamp_split[1])
 28 |     second = int(timestamp_sec_split[0])
 29 |     centisecond = int(timestamp_sec_split[1])
 30 | 
 31 |     centiseconds = 360000*hour + 6000*minute + 100*second + centisecond
 32 |     return centiseconds
 33 | 
 34 | def generate_styles_list(lines_list):
 35 |     is_valid_style_bottom = []
 36 |     is_valid_style_top = []
 37 | 
 38 |     for line in lines_list:
 39 |         if line.startswith("Style: "):
 40 |             style_split = line.split(",")
 41 |             style_name = style_split[0].replace("Style: ", "")
 42 |             style_font = style_split[1]
 43 |             style_align = style_split[18]
 44 |             if (style_font == DIALOGUE_FONT or DIALOGUE_FONT == None) and style_align == "2":
 45 |                 is_valid_style_bottom.append(style_name)
 46 |             elif (style_font == DIALOGUE_FONT or DIALOGUE_FONT == None) and style_align == "8":
 47 |                 is_valid_style_top.append(style_name)
 48 | 
 49 |     return is_valid_style_bottom, is_valid_style_top
 50 | 
 51 | def apply_overlap_blue(d, end_times):
 52 |     start_time = timestamp_to_centiseconds(d.get("Start"))
 53 |     end_time = timestamp_to_centiseconds(d.get("End"))
 54 | 
 55 |     apply_on_index = 0
 56 |     for i in range(len(end_times) + 1):
 57 |         current_end_time = end_times[i] if i < len(end_times) else 0
 58 |         if start_time < (current_end_time - CENTISECOND_LENIENCY): # correct for within the frame (within 5 centiseconds for a bit extra there shouldn't be one frame gaps anyways)
 59 |             apply_on_index = i + 1
 60 |         else:
 61 |             try:
 62 |                 end_times[i] = end_time
 63 |             except IndexError:
 64 |                 end_times.append(end_time)
 65 |             break
 66 | 
 67 |     if apply_on_index % 2 != 0: # On odd indexes add overlap color
 68 |         color_tag = "{\\3c" + COLOR_HEX + "}"
 69 |         d["Text"] = color_tag + d["Text"]
 70 | 
 71 |     line = (dict2line(d))
 72 | 
 73 |     return line
 74 | 
 75 | def apply(lines_list, is_valid_style_bottom, is_valid_style_top):
 76 |     bottom_end_times = [0]
 77 |     top_end_times = [0]
 78 | 
 79 |     for line_idx, line in enumerate(lines_list):
 80 |         if not line.startswith("Dialogue: "):
 81 |             continue
 82 | 
 83 |         d = line2dict(line)
 84 | 
 85 |         # Top track
 86 |         if (d.get("Style") in is_valid_style_top and not any(keyword in d.get("Text", "") \
 87 |             for keyword in ["\\an", "\\pos", "\\move"]) or ("\\an8" in d.get("Text")) and "Subtitle" in d.get("Style")):
 88 |             lines_list[line_idx] = apply_overlap_blue(d, top_end_times)
 89 | 
 90 |         # Bottom track
 91 |         elif d.get("Style") in is_valid_style_bottom and not any(keyword in d.get("Text", "") \
 92 |             for keyword in ["\\an", "\\pos", "\\move"]):
 93 |             lines_list[line_idx] = apply_overlap_blue(d, bottom_end_times)
 94 | 
 95 | def main(inpath, outpath):
 96 |     with open(inpath, encoding="utf-8") as infile:
 97 |         lines_list = infile.readlines()
 98 | 
 99 |     is_valid_style_bottom, is_valid_style_top = generate_styles_list(lines_list)
100 |     apply(lines_list, is_valid_style_bottom, is_valid_style_top)
101 | 
102 |     with open(outpath, "w", encoding="utf-8") as outfile:
103 |         outfile.writelines(lines_list)
104 | 
105 | if __name__ == "__main__":
106 |     if len(sys.argv) != 3:
107 |         sys.exit(f"Usage: {sys.argv[0]} infile.ass outfile.ass")
108 | 
109 |     main(sys.argv[1], sys.argv[2])
110 | 


--------------------------------------------------------------------------------
/P-Proper_Stutter/P-Proper_Stutter.py:
--------------------------------------------------------------------------------
  1 | # P-Proper_Stutter V2.3
  2 | import sys
  3 | import re
  4 | 
  5 | def line2dict(line):
  6 |     line_pattern = re.compile(r"(?P<Format>[^:]*): ?(?P<Layer>\d*), ?(?P<Start>[^,]*), ?(?P<End>[^,]*), ?(?P<Style>[^,]*), ?(?P<Name>[^,]*), ?(?P<MarginL>[^,]*), ?(?P<MarginR>[^,]*), ?(?P<MarginV>[^,]*), ?(?P<Effect>[^,]*),(?P<Text>.*\n)")
  7 |     """pull fields out of ass event into dictionary
  8 |     takes string line as argument and returns dictionary or None if line is not an ASS event"""
  9 |     # print(line) # <- fun UnicodeEncodeErrors!
 10 |     match = line_pattern.match(line)
 11 |     if match:
 12 |         return {key: match.group(key) for key in line_pattern.groupindex}
 13 |     else:
 14 |         return None
 15 | 
 16 | def dict2line(d):
 17 |     return "{Format}: {Layer},{Start},{End},{Style},{Name},{MarginL},{MarginR},{MarginV},{Effect},{Text}".format(**d)
 18 | 
 19 | def two_letter_fix(words):
 20 |     for x in range(20):
 21 |         words = re.sub(r"(?<!\w)([Ww])-([Ww]h)", "\\1h-\\2", words)
 22 |         words = re.sub(r"(?<!\w)([Ss])-([Ss]h)", "\\1h-\\2", words)
 23 |         words = re.sub(r"(?<!\w)([Tt])-([Tt]h)", "\\1h-\\2", words)
 24 |         
 25 |     return words
 26 | 
 27 | def one_letter_stutter(letters, a):
 28 |     if letters[1+a] == "-" and letters[0+a].lower() == letters[2+a].lower():
 29 |         letters[2+a] = letters[2+a].upper()
 30 |         # Y-y-y-you
 31 |         try:
 32 |             b = 3 + a
 33 |             while True:
 34 |                 if letters[b] == "-" and letters[b-1].lower() == letters[b+1].lower():
 35 |                     letters[b+1] = letters[b+1].upper()
 36 |                     b += 2
 37 |                 else:
 38 |                     break
 39 |         except IndexError:
 40 |             pass
 41 |     
 42 |     return letters
 43 | 
 44 | def two_letter_stutter(letters, a):
 45 |     letters[3+a] = letters[3+a].upper()
 46 |     # Th-th-th-this
 47 |     try:
 48 |         b = 5 + a
 49 |         while True:
 50 |             if letters[b] == "-" and letters[b-2].lower() == letters[b+1].lower() and letters[b-1].lower() == letters[b+2].lower():
 51 |                 letters[b+1] = letters[b+1].upper()
 52 |                 b += 3
 53 |             else:
 54 |                 break
 55 |     except IndexError:
 56 |         pass
 57 |     
 58 |     return letters
 59 |     
 60 | def blacklist_only_two_letters(word):
 61 |     pattern = r'\b\w*-\w{3,}\b'
 62 |     return bool(re.search(pattern, word))
 63 | 
 64 | def stutter_fix(word):
 65 |     try:
 66 |         a = word.find(next(filter(str.isalpha, word))) # Index of first alphabet character
 67 |     except:
 68 |         a = 0
 69 |     letters = [x for x in word] 
 70 |     try:
 71 |         if letters[0+a].isupper():
 72 |             if letters[1+a] == "-" and letters[0+a].lower() == letters[2+a].lower():
 73 |                 letters = one_letter_stutter(letters, a)
 74 |                     
 75 |             elif letters[2+a] == "-" and letters[0+a].lower() == letters[3+a].lower() and letters[1+a].lower() == letters[4+a].lower():
 76 |                 if blacklist_only_two_letters(word):
 77 |                     letters = two_letter_stutter(letters, a)
 78 |     except:
 79 |         pass
 80 |     new_word = "".join(letters)
 81 |     
 82 |     return new_word
 83 | 
 84 | def stutter_opperations(words):
 85 |     words = words.replace("\\N", " \\N ")
 86 |     words = words.replace("}", "} ")
 87 |     words = two_letter_fix(words)
 88 |     words_ls = words.split(" ") # W-well,|not|like|I|have|much|choice!
 89 |     new_line = []
 90 |     for word in words_ls:
 91 |         # S..so -> S... So or S-So
 92 |         m = re.search(r"(?<!\w)(\w)\.\.(\w)", word)
 93 |         try:
 94 |             if m.group(1) == m.group(2):
 95 |                 word = re.sub(r"(?<!\w)(\w)\.\.(\w)", "\\1..\\2", word) # "\\1... \\2" or "\\1-\\2" or "\\1..\\2"
 96 |             elif m.group(1).lower() == m.group(2):
 97 |                 g2 = str(m.group(1).upper())
 98 |                 word = re.sub(r"(?<!\w)(\w)\.\.(\w)", f"\\1..{g2}", word) # "\\1... {g2}" or "\\1-{g2}" or "\\1..{g2}"
 99 |         except:
100 |             pass
101 |         
102 |         # Th..this -> Th..This or Th-This
103 |         m = re.search(r"(?<!\w)(\w{2})\.\.(\w{2})", word)
104 |         try:
105 |             if m.group(1) == m.group(2):
106 |                 word = re.sub(r"(?<!\w)(\w{2})\.\.(\w{2})", "\\1..\\2", word) # "\\1... \\2" or "\\1-\\2" or "\\1..\\2"
107 |             elif m.group(1).lower() == m.group(2):
108 |                 g2 = str(m.group(1).capitalize())
109 |                 word = re.sub(r"(?<!\w)(\w{2})\.\.(\w{2})", f"\\1..{g2}", word)  # "\\1... {g2}" or "\\1-{g2}" or "\\1..{g2}"
110 |         except:
111 |             pass
112 |         
113 |         if "-" in word:
114 |             new_line.append(stutter_fix(word))
115 |         else:
116 |             new_line.append(word)
117 |         
118 |     new_line = " ".join(new_line)
119 |     new_line = new_line.replace(" \\N ", "\\N")
120 |     new_line = new_line.replace("} ", "}")
121 |     
122 |     return new_line
123 | 
124 | def apply(lines_list):
125 |     for line_idx, line in enumerate(lines_list):
126 |         if line.startswith("Dialogue: "):
127 |             d = line2dict(line)
128 |             d["Text"] = stutter_opperations(d["Text"])   
129 |             lines_list[line_idx] = dict2line(d)
130 |        
131 | def main(inpath, outpath):
132 |     with open(inpath, encoding="utf-8") as infile:
133 |         lines_list = infile.readlines()
134 | 
135 |     apply(lines_list)
136 |     
137 |     with open(outpath, "w", encoding="utf-8") as outfile:
138 |         outfile.writelines(lines_list)
139 |     
140 | if __name__ == "__main__":
141 |     if len(sys.argv) != 3:
142 |         sys.exit(f"Usage: {sys.argv[0]} infile.ass outfile.ass")
143 | 
144 |     main(sys.argv[1], sys.argv[2])
145 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # SubsPlus+ Automation Scripts
  2 | Automation scripts made for fixing Hidive subtitles and other functions.
  3 | 
  4 | ## Scripts
  5 | Check the wiki tab for additional information. I'm not an experienced programmer, so constructive criticism is appreciated.
  6 | 
  7 | These scripts are first and foremost made for SubsPlus+ releases and Hidive subtitles, so don't expect the more specific scripts to work outside of that context.
  8 | 
  9 | ---
 10 | 
 11 | ### Auto_Chap
 12 | Generate chapters by matching themes downloaded from [AnimeThemes](https://animethemes.moe) to the episode.
 13 | 
 14 | It creates a `.themes` folder with the downloaded themes for future runs and charts showing where the themes matched in the episode. Chapters will not be generated if no matches or more than 2 themes are matched, or 2 themes are in the same half of the episode. Themes tagged with `Transition` or `Over` on animethemes will not be downloaded and non-conventional themes like Oshi no Ko will likely not work as intended.
 15 | 
 16 | Note: You should mux with the outputed chapter file with mkvmerge but if you want to manually input chapters then get them from the output chapter file since the times in the logs are not final.
 17 | 
 18 | #### Dependencies
 19 | ```
 20 | pip install -r requirements.txt
 21 | ```
 22 | 
 23 | ffmpeg is required to be installed in PATH.
 24 | 
 25 | If you want frame-perfect chapters using `--snap`, keyframe generation requires:
 26 | - https://github.com/vapoursynth/vapoursynth
 27 | - https://github.com/FFMS/ffms2 (Install in vapoursynth plugins)
 28 | - https://github.com/dubhater/vapoursynth-scxvid (Install in vapoursynth plugins)
 29 | 
 30 | #### Usage
 31 | ```console
 32 | $ python Auto_Chap.py --help
 33 | usage: Auto_Chap.py [-h] --input INPUT [--output OUTPUT] [--search-name SEARCH_NAME] [--year YEAR]
 34 |                     [--snap [SNAP]] [--episode-snap EPISODE_SNAP] [--score SCORE]
 35 |                     [--theme-portion THEME_PORTION] [--downsample DOWNSAMPLE] [--parallel-dl PARALLEL_DL]
 36 |                     [--work-path WORK_PATH] [--delete-themes] [--charts]
 37 | 
 38 | Automatic anime chapter generator using AnimeThemes.
 39 | 
 40 | options:
 41 |   -h, --help            show this help message and exit
 42 |   --input INPUT, -i INPUT
 43 |                         Video/Audio file.
 44 |   --output OUTPUT, -o OUTPUT
 45 |                         Output chapter file. Defaults to where the episode is.
 46 |   --search-name SEARCH_NAME, -s SEARCH_NAME
 47 |                         Search to pass to animethemes.moe Example: Spy Classroom Season 2. To only use
 48 |                         themes that are already downloaded, don't add this argument.
 49 |   --year YEAR           Release year to help filter the search. Put the negative number to allow that year
 50 |                         or later.
 51 |   --snap [SNAP]         Millisecond window to snap to nearest keyframe for frame-perfect chapters.
 52 |                         Efficiently generates necessary keyframes from video. Defaults to 1000ms if no value
 53 |                         added. Values higher than about 1000 currently crash.
 54 |   --episode-snap EPISODE_SNAP
 55 |                         Window in seconds to snap chapters to the start or end of the episode. This gets
 56 |                         applied at the very end. Defaults to 4.
 57 |   --score SCORE         Score required for a theme to be accepted as a match. Increase it to reduce false
 58 |                         positives, decrease it to be more lenient. Score is y-axis in charts divided by
 59 |                         downsample factor. Defaults to 2000.
 60 |   --theme-portion THEME_PORTION
 61 |                         Portion of a theme required in the episode to be a match. Keep below 1 so that it
 62 |                         can still match themes that get slightly cut off. Defaults to 0.9.
 63 |   --downsample DOWNSAMPLE
 64 |                         Factor to downsample audio when matching, higher means speedier potentially with
 65 |                         lower accuracy. Defaults to 32.
 66 |   --parallel-dl PARALLEL_DL
 67 |                         How many themes to download in parallel. Defaults to 10.
 68 |   --work-path WORK_PATH, -w WORK_PATH
 69 |                         Place to create a .themes folder for storing persistent information per series.
 70 |                         Defaults to where the episode is.
 71 |   --delete-themes, -d   Delete the themes and charts after running.
 72 |   --charts, -c          Make charts of where themes are matched in the episode. They can almost double
 73 |                         processing time in some cases though.
 74 | ```
 75 | 
 76 | #### Examples
 77 | Generate chapters for an episode.
 78 | ```
 79 | python Auto_Chap.py -i "Dangers in My Heart - 01.mkv" -s "Dangers in My Heart Season 1"
 80 | ```
 81 | 
 82 | Generate chapters without a persistant `.themes` folder.
 83 | ```
 84 | python Auto_Chap.py -i "Dangers in My Heart - 01.mkv" -s "Dangers in My Heart Season 1" -d
 85 | ```
 86 | 
 87 | Work in a project folder and output to chapter path.
 88 | ```
 89 | python Auto_Chap.py -i "Dangers in My Heart - 01.mkv" -s "Dangers in My Heart Season 1" -w "Projects/DMH" -o "Projects/DMH/Chapters/01.chapter.txt"
 90 | ```
 91 | 
 92 | Run with themes predownloaded (in `.ogg` and in `.themes` folder).
 93 | ```
 94 | python Auto_Chap.py -i "Dangers in My Heart - 01.mkv"
 95 | ```
 96 | 
 97 | Filter search for shows that released on or after 2023.
 98 | ```
 99 | python Auto_Chap.py -i "Shangri-la Frontier - 01.mkv" -s "Shangri-la frontier Season 1" --year -2023
100 | ```
101 | 
102 | Snap to nearest keyframe within 1000ms for frame-perfect chapters. Snap values that are too high don't work for some reason but 1000 should be plenty.
103 | ```
104 | python Auto_Chap.py -i "Dangers in My Heart - 01.mkv" -s "Dangers in My Heart Season 1" --snap 1000
105 | ```
106 | 
107 | Chapter names can be changed at the top of the script.
108 | ```python
109 | PRE_OP = "Prologue"
110 | OPENING = "Opening"
111 | EPISODE = "Episode"
112 | ENDING = "Ending"
113 | POST_ED = "Epilogue"
114 | ```
115 | 
116 | ---
117 | 
118 | ### Chapter_Snapper
119 | Snap chapter file to nearest keyframe using existing scxvid keyframes. Chapter file must be in simple chapter format. Useful if you already have keyframes but if not then use the keyframe generation in Auto_Chap.
120 | 
121 | #### Usage
122 | ```console
123 | $ python Chapter_Snapper.py -h
124 | usage: Chapter_Snapper.py [-h] --input INPUT --keyframes KEYFRAMES [--output OUTPUT] [--snap-ms SNAP_MS]
125 |                                [--fps FPS]
126 | 
127 | Snap chapters to nearest keyframe.
128 | 
129 | options:
130 |   -h, --help            show this help message and exit
131 |   --input INPUT, -i INPUT
132 |                         Chapter file. Must be in simple format.
133 |   --keyframes KEYFRAMES, -kf KEYFRAMES
134 |                         SCXvid keyframes. Try to have minimal mkv delay or it might not line up.
135 |   --output OUTPUT, -o OUTPUT
136 |                         Output chapter file. Defaults to where input is.
137 |   --snap-ms SNAP_MS, -s SNAP_MS
138 |                         How many milliseconds to consider snapping to. Defaults to 1000ms.
139 |   --fps FPS             FPS of the video. Defaults to 23.976.
140 | ```
141 | 
142 | #### Examples
143 | Generate with increased snap window.
144 | ```
145 | python Chapter_Snapper.py -i "autochap.txt" -kf "keyframes.txt" -s 2000 -o "Project/chapter_snapped.txt"
146 | ```
147 | 
148 | ---
149 | 
150 | ### Converter
151 | All the other scripts expect to be run off of the output of this. Detects what the new Hidive Q styles should be (Subtitle, Caption, Song) and restyles them. Song detection is very difficult now, there may be false positives or missed insert-songs so keep an eye on it. You can switch between the advanced song detection method and the restrictive one by commenting the other one out.
152 | 
153 | #### Dependencies
154 | ```
155 | pip install ass
156 | ```
157 | 
158 | #### Usage
159 | Accepts new Erai-raws Hidive scripts (where the styles are all named Q#) and multi-downloader-nx Hidive script using settings `--fontSize 48 --originalFontSize false`
160 | ```
161 | python Converter.py infile.ass outfile.ass
162 | ```
163 | 
164 | ---
165 | 
166 | ### Overlap_Blue
167 | Change outline color of a line when it is overlapping another. Supports top and bottom track but only if they are defined in styles not inline. Works well for Crunchyroll and Hidive shows.
168 | 
169 | #### Usage
170 | ```
171 | python Overlap_Blue.py infile.ass outfile.ass
172 | ```
173 | 
174 | Change settings at the top of the script. Specify dialogue font if you don't want it to color an alternative dialogue font. CENTISECOND_LENIENCY means don't count as overlap if it is less than _ centiseconds. Any value above 0 has a chance of missing overlaps if the frame advances between the two lines. But some scripts may have two lines within the same frame that get misinterpreted as an overlap if no Leniency given.
175 | ```python
176 | DIALOGUE_FONT = "SPOverrideF"
177 | COLOR_HEX = "&H743E15&"
178 | CENTISECOND_LENIENCY = 3
179 | ```
180 | 
181 | ---
182 | 
183 | ### Hidive_Splitter
184 | Split and combine lines so that each line has its own event. This prevents subtitles from shifting positions. Note: Some orders will be reversed to preserve rendering position. It can be enabled on [multi-downloader-nx](https://github.com/anidl/multi-downloader-nx) by adding `--combineLines` to the arguments but running this on top of them is still safe.
185 | 
186 | #### Usage
187 | ```
188 | python Hidive_Splitter.py infile.ass outfile.ass
189 | ```
190 | 
191 | ---
192 | 
193 | ### P-Proper_Stutter
194 | Capitalize and fix stutters:
195 | - F-f-find -> F-F-Find
196 | - Sh-she -> Sh-She
197 | - W-when -> Wh-When
198 | - S..so -> S..So (Can be configure to be S-So or S... So)
199 | 
200 | #### Usage
201 | ```
202 | python P-Proper_Stutter.py infile.ass outfile.ass
203 | ```
204 | 
205 | ---
206 | 
207 | ### Regex_Stuff
208 | - Add fade to song styles
209 | - Fix em-dash
210 | - Fix triple dialogue lines
211 | - !? -> ?!
212 | - Increase layers on non 'caption' styles
213 | - Fix incorrectly styled 'caption's
214 | 
215 | #### Usage
216 | ```
217 | python Regex_Stuff.py infile.ass outfile.ass
218 | ```
219 | 
220 | ---
221 | 
222 | ### Resampler
223 | Simple script resampler from 720p to 1080p with rounding. Only works for these simple Hidive subtitles, don't try to use this in other cases.
224 | 
225 | 
226 | #### Dependencies
227 | ```
228 | pip install ass
229 | ```
230 | 
231 | #### Usage
232 | ```
233 | python Resampler.py infile.ass outfile.ass
234 | ```
235 | 
236 | ---
237 | 
238 | ### Sign_DeOverlap
239 | Send dialogue to the top if it covers a sign. It approximates the bounding boxes of the dialogue and the signs then compares them. Only works with converted Hidive subtitles, don't try to use this in other cases.
240 | 
241 | #### Dependencies
242 | ```
243 | pip install "pillow>=10.1.0"
244 | ```
245 | 
246 | #### Usage
247 | ```
248 | python Sign_DeOverlap.py infile.ass outfile.ass
249 | ```
250 | 
251 | ---
252 | 
253 | ### Style_Cleanup
254 | Simplify styles to look nicer internally, does not affect how it renders. Original code and idea from [Animorphs](https://github.com/Animorphs) modified into a standalone script.
255 | 
256 | #### Usage
257 | ```
258 | python Style_Cleanup.py infile.ass outfile.ass
259 | ```
260 | 
261 | ---
262 | 
263 | ## Acknowledgements
264 | - [hiisi13/Audio Offset](https://github.com/hiisi13/audio-offset-finder) for finding themes within an episode
265 | - [AnimeThemes](https://animethemes.moe) api for getting themes
266 | - [iamevn](https://gist.github.com/iamevn/6d796a1c8296ac325da4545fd20caf2f) ass parsing code
267 | - [tp7/Prass](https://github.com/tp7/Prass) keyframe snapping code for Chapter_Snapper
268 | - [Myaamori/keyframes.py](https://gist.github.com/Myaamori/dfb0030fd4ee44364ca3b0c2c9c9b4aa) inspiration for auto_chap keyframes generation
269 | - [FichteFoll/snap_scenechanges.py](https://gist.github.com/FichteFoll/9184e0ef75df71d7da184c485caf5266) functions and logic for converting frames to time, etc. in auto_chap snapping
270 | - [Animorphs](https://github.com/Animorphs) original code for Style_Cleanup and general help with other scripts


--------------------------------------------------------------------------------
/Regex_Stuff/Regex_Stuff.py:
--------------------------------------------------------------------------------
  1 | # Regex Stuff V2.5
  2 | import sys
  3 | import re
  4 | 
  5 | def line2dict(line):
  6 |     line_pattern = re.compile(r"(?P<Format>[^:]*): ?(?P<Layer>\d*), ?(?P<Start>[^,]*), ?(?P<End>[^,]*), ?(?P<Style>[^,]*), ?(?P<Name>[^,]*), ?(?P<MarginL>[^,]*), ?(?P<MarginR>[^,]*), ?(?P<MarginV>[^,]*), ?(?P<Effect>[^,]*),(?P<Text>.*\n)")
  7 |     """pull fields out of ass event into dictionary
  8 |     takes string line as argument and returns dictionary or None if line is not an ASS event"""
  9 |     # print(line) # <- fun UnicodeEncodeErrors!
 10 |     match = line_pattern.match(line)
 11 |     if match:
 12 |         return {key: match.group(key) for key in line_pattern.groupindex}
 13 |     else:
 14 |         return None
 15 | 
 16 | def dict2line(d):
 17 |     return "{Format}: {Layer},{Start},{End},{Style},{Name},{MarginL},{MarginR},{MarginV},{Effect},{Text}".format(**d)
 18 | 
 19 | def fix_incorrect_songs_style(lines_list):
 20 |     # Hidive-downloader-nx sometimes incorrect assigns styles, this is a problem with the downloader not hidive.
 21 | 
 22 |     DEFAULT_CAPTIONS_STYLE = "Style: Caption-Default,Swis721 BT,40,&H00FFFFFF,&H00FFFFFF,&H00000000,&H00000000,-1,0,0,0,100,100,0,0,1,1,2,2,20,20,20,1\n"
 23 | 
 24 |     add_default_style = False
 25 |     for line_idx, line in enumerate(lines_list):
 26 |         if line.startswith("Dialogue:"):
 27 |             d = line2dict(line)
 28 |             if "Song" in d["Style"] and "\\pos" in d["Text"]:
 29 |                 d["Style"] = "Caption-Default"
 30 |                 add_default_style = True
 31 |             lines_list[line_idx] = dict2line(d)
 32 | 
 33 |     if add_default_style:
 34 |         line_idx = 0
 35 |         while lines_list[line_idx] != "[V4+ Styles]\n":
 36 |             line_idx += 1
 37 | 
 38 |         while lines_list[line_idx] != "\n":
 39 |             line_idx += 1
 40 | 
 41 |         lines_list.insert(line_idx, DEFAULT_CAPTIONS_STYLE)
 42 | 
 43 | def apply_fix(lines_list, modification_func):
 44 |     for line_idx, line in enumerate(lines_list):
 45 |         if line.startswith("Dialogue:"):
 46 |             d = line2dict(line)
 47 |             lines_list[line_idx] = dict2line(modification_func(d))
 48 | 
 49 | def add_song_fad(d):
 50 |     if "Song" in d["Style"] and "{\\fad(150,150)}" not in d["Text"]:
 51 |         d["Text"] = "{\\fad(150,150)}" + d["Text"]
 52 |     return d
 53 | 
 54 | def fix_em_dash(d):
 55 |     # Dash with a space before it is usually em-dash for punctuation.
 56 |     # The space should be removed before the general replace in the last line
 57 |     if not (re.search(r"\s-[A-Za-z]", d["Text"]) or re.search(r"\}-[A-Za-z]", d["Text"]) or d["Text"].startswith("-")): # Not Caption line that is -Title of Something-
 58 |         d["Text"] = d["Text"].replace("--", "—")
 59 |         if not d["Style"].startswith("Caption"):
 60 |             d["Text"] = d["Text"].replace(" - ", "—")
 61 |             d["Text"] = d["Text"].replace(" -\\N", "—\\N")
 62 |             d["Text"] = d["Text"].replace("-\\N", "—\\N")
 63 |             #d["Text"] = d["Text"].replace(" -([\"“”'’])", "—\1")
 64 |             d["Text"] = re.sub(" -([\"“”'’])", "—\\1", d["Text"])
 65 |             d["Text"] = re.sub("-([\"“”'’])", "—\\1", d["Text"])
 66 |         d["Text"] = d["Text"].replace("-?", "—?")
 67 |         d["Text"] = d["Text"].replace("-!", "—!")
 68 |         d["Text"] = d["Text"].replace(" - \n", "— \n")
 69 |         d["Text"] = d["Text"].replace(" -\n", "—\n")
 70 |         d["Text"] = d["Text"].replace("- \n", "— \n")
 71 |         d["Text"] = d["Text"].replace("-\n", "—\n")
 72 |         d["Text"] = d["Text"].replace("\\N- ", "\\N—")
 73 |         # d["Text"] = d["Text"].replace("\\N-", "\\N—") # Could be false positive
 74 |         if not d["Style"].startswith("Caption"):
 75 |             d["Text"] = d["Text"].replace("- ", "— ")
 76 |     return d
 77 | 
 78 | def fix_long_lines(d):
 79 |     # If 3 liner or more then get rid of new lines chars
 80 |     # Important that this program is run after Hidive splitter
 81 | 
 82 |     if not "}-" in d["Text"]: # Caption line that is -Title of Something-
 83 |         if not d["Style"].startswith("Caption"):
 84 |             pattern = re.compile(r"\\N")
 85 |             occurrences = pattern.findall(d["Text"])
 86 |             if len(occurrences) >= 2:
 87 |                 d["Text"] = re.sub(r"\\N", " ", d["Text"])
 88 | 
 89 |     return d
 90 | 
 91 | def fix_interrobang(d):
 92 |     # Replace !? with ?!
 93 | 
 94 |     d["Text"] = re.sub(r"(?<![?!])(!\?)", r"?!;q;", d["Text"])
 95 |     for i in range(20): # For odd ammounts like !?! -> ?!?
 96 |         d["Text"] = re.sub(r"(?<=[?!])(;q;[!?])", r"?;e;", d["Text"])
 97 |         d["Text"] = re.sub(r"(?<=[?!])(;e;[!?])", r"!;q;", d["Text"])
 98 |     d["Text"] = d["Text"].replace(";q;", "")
 99 |     d["Text"] = d["Text"].replace(";e;", "")
100 | 
101 |     return d
102 | 
103 | def fix_symbols(d):
104 |     d["Text"] = re.sub("‘", "'", d["Text"])
105 |     d["Text"] = re.sub("’", "'", d["Text"])
106 |     d["Text"] = re.sub("“", '"', d["Text"])
107 |     d["Text"] = re.sub("”", '"', d["Text"])
108 | 
109 |     return d
110 | 
111 | def fix_layers(d):
112 |     if not d["Style"].startswith("Caption"):
113 |         d["Layer"] = "10"
114 | 
115 |     return d
116 | 
117 | def main(inpath, outpath):
118 |     lines_list = list()
119 |     with open(inpath, encoding="utf-8") as infile:
120 |         lines_list = infile.readlines()
121 | 
122 |     # fix_incorrect_songs_style(lines_list)
123 |     apply_fix(lines_list, add_song_fad)
124 |     apply_fix(lines_list, fix_em_dash)
125 |     apply_fix(lines_list, fix_long_lines)
126 |     apply_fix(lines_list, fix_interrobang)
127 |     apply_fix(lines_list, fix_symbols)
128 |     apply_fix(lines_list, fix_layers)
129 | 
130 |     with open(outpath, "w", encoding="utf-8") as outfile:
131 |         outfile.writelines(lines_list)
132 | 
133 | if __name__ == "__main__":
134 |     if len(sys.argv) != 3:
135 |         sys.exit(f"Usage: {sys.argv[0]} infile.ass outfile.ass")
136 | 
137 |     main(sys.argv[1], sys.argv[2])


--------------------------------------------------------------------------------
/Resampler/Resampler.py:
--------------------------------------------------------------------------------
 1 | # Resampler V1.2
 2 | 
 3 | import sys
 4 | import ass
 5 | import re
 6 | 
 7 | RESAMPLE_FACTOR = 1.5 # (720p to 1080p)
 8 | 
 9 | def change_resolution(doc):
10 |     doc.info["PlayResX"] = "1920"
11 |     doc.info["PlayResY"] = "1080"
12 | 
13 | def resample_styles(doc):
14 |     for style in doc.styles:
15 |         style.fontsize = round(style.fontsize * RESAMPLE_FACTOR, 0)
16 |         if not (style.spacing == 0.1 or style.spacing == 0.01):
17 |             style.spacing = round(style.spacing * RESAMPLE_FACTOR, 1)
18 |         style.outline = round(style.outline * RESAMPLE_FACTOR, 1)
19 |         style.shadow = round(style.shadow * RESAMPLE_FACTOR, 1)
20 |         style.margin_l = round(style.margin_l * RESAMPLE_FACTOR, 0)
21 |         style.margin_r = round(style.margin_r * RESAMPLE_FACTOR, 0)
22 |         style.margin_v = round(style.margin_v * RESAMPLE_FACTOR, 0)
23 | 
24 | 
25 | def resample_lines(doc):
26 |     for event in doc.events:
27 |         event.text = re.sub(r"\\fs([\d.]+)", lambda m: f"\\fs{round(float(m.group(1)) * RESAMPLE_FACTOR)}", event.text)
28 |         event.text = re.sub(r"\\pos\(([\d.]+),([\d.]+)\)",
29 |                             lambda m: f"\\pos({round(float(m.group(1)) * RESAMPLE_FACTOR)},{round(float(m.group(2)) * RESAMPLE_FACTOR)})",
30 |                             event.text)
31 | 
32 | 
33 | def main(inpath, outpath):
34 |     with open(inpath, encoding='utf_8_sig') as f:
35 |         doc = ass.parse(f)
36 | 
37 |     change_resolution(doc)
38 |     resample_styles(doc)
39 |     resample_lines(doc)
40 | 
41 |     with open(outpath, "w", encoding='utf_8_sig') as f:
42 |         doc.dump_file(f)
43 | 
44 | 
45 | if __name__ == "__main__":
46 |     if len(sys.argv) != 3:
47 |         sys.exit(f"Usage: {sys.argv[0]} infile.ass outfile.ass")
48 |     main(sys.argv[1], sys.argv[2])


--------------------------------------------------------------------------------
/Sign_DeOverlap/Sign_DeOverlap.py:
--------------------------------------------------------------------------------
  1 | # Sign_DeOverlap V1.6
  2 | 
  3 | import re
  4 | import sys
  5 | from PIL import Image, ImageDraw, ImageFont
  6 | 
  7 | '''
  8 | - Uses logic based on an8 in style and an8 inline being different so run before style_cleanup.
  9 | - Also assumes an7 inline means normal and no inline an tag means an8. Change behaviour in make_caption_areas() if needed
 10 | - Assumes lines are sorted by start time
 11 | '''
 12 | 
 13 | scale = 1 # Change to 1.5 for 1080p
 14 | widen_ratio = 1.01 # Based on screen height
 15 | line_switching_threshold = 500 # Centiseconds threshold to send an2 lines sandwiched between 2 an8 lines to an8
 16 | 
 17 | script_width = int(1280 * scale)
 18 | script_height = int(720 * scale)
 19 | 
 20 | # No font so approximation
 21 | try:
 22 |     dialogue_font = ImageFont.load_default(50 * scale * 0.85)
 23 | except Exception:
 24 |     print("Make sure you have updated to >= 10.1.0", file=sys.stderr)
 25 |     sys.exit(1)
 26 | dialogue_spacing = 10 * scale
 27 | 
 28 | margin_v = 40 * scale
 29 | 
 30 | img = Image.new('RGB', (script_width, script_height), color='white')
 31 | draw = ImageDraw.Draw(img)
 32 | 
 33 | def line2dict(line):
 34 |     line_pattern = re.compile(r"(?P<Format>[^:]*): ?(?P<Layer>\d*), ?(?P<Start>[^,]*), ?(?P<End>[^,]*), ?(?P<Style>[^,]*), ?(?P<Name>[^,]*), ?(?P<MarginL>[^,]*), ?(?P<MarginR>[^,]*), ?(?P<MarginV>[^,]*), ?(?P<Effect>[^,]*),(?P<Text>.*\n)")
 35 |     match = line_pattern.match(line)
 36 |     if match:
 37 |         return {key: match.group(key) for key in line_pattern.groupindex}
 38 |     else:
 39 |         return None
 40 | 
 41 | def style2dict(line):
 42 |     line_pattern = re.compile(r"(?P<Format>[^:]*): ?(?P<Name>[^,]*), ?(?P<Fontname>[^,]*), ?(?P<Fontsize>[^,]*), ?(?P<PrimaryColour>[^,]*), ?(?P<SecondaryColour>[^,]*), ?(?P<OutlineColour>[^,]*), ?(?P<BackColour>[^,]*), ?(?P<Bold>[^,]*), ?(?P<Italic>[^,]*), ?(?P<Underline>[^,]*), ?(?P<StrikeOut>[^,]*), ?(?P<ScaleX>[^,]*), ?(?P<ScaleY>[^,]*), ?(?P<Spacing>[^,]*), ?(?P<Angle>[^,]*), ?(?P<BorderStyle>[^,]*), ?(?P<Outline>[^,]*), ?(?P<Shadow>[^,]*), ?(?P<Alignment>[^,]*), ?(?P<MarginL>[^,]*), ?(?P<MarginR>[^,]*), ?(?P<MarginV>[^,]*), ?(?P<Encoding>.*)\n")
 43 |     match = line_pattern.match(line)
 44 |     if match:
 45 |         return {key: match.group(key) for key in line_pattern.groupindex}
 46 |     else:
 47 |         return None
 48 | 
 49 | def dict2line(d):
 50 |     return "{Format}: {Layer},{Start},{End},{Style},{Name},{MarginL},{MarginR},{MarginV},{Effect},{Text}".format(**d)
 51 | 
 52 | def timestamp_to_centiseconds(timestamp):
 53 |     timestamp_split = timestamp.split(":")
 54 |     timestamp_sec_split = timestamp_split[2].split(".")
 55 |     hour = int(timestamp_split[0])
 56 |     minute = int(timestamp_split[1])
 57 |     second = int(timestamp_sec_split[0])
 58 |     centisecond = int(timestamp_sec_split[1])
 59 | 
 60 |     centiseconds = 360000*hour + 6000*minute + 100*second + centisecond
 61 |     return centiseconds
 62 | 
 63 | def centiseconds_to_timestamp(centiseconds):
 64 |     hours, remainder = divmod(centiseconds, 360000)
 65 |     minutes, remainder = divmod(remainder, 6000)
 66 |     seconds, centisecond = divmod(remainder, 100)
 67 | 
 68 |     timestamp = f"{hours:01d}:{minutes:02d}:{seconds:02d}.{centisecond:02d}"
 69 |     return timestamp
 70 | 
 71 | def convert_to_centiseconds(lines_list):
 72 |     for line_idx, line in enumerate(lines_list):
 73 |         if line.startswith("Dialogue: "):
 74 |             line_dict = line2dict(line)
 75 |             line_dict["Start"] = timestamp_to_centiseconds(line_dict["Start"])
 76 |             line_dict["End"] = timestamp_to_centiseconds(line_dict["End"])
 77 |             lines_list[line_idx] = dict2line(line_dict)
 78 | 
 79 | def convert_to_timestamp(lines_list):
 80 |     for line_idx, line in enumerate(lines_list):
 81 |         if line.startswith("Dialogue: "):
 82 |             line_dict = line2dict(line)
 83 |             line_dict["Start"] = centiseconds_to_timestamp(int(line_dict["Start"]))
 84 |             line_dict["End"] = centiseconds_to_timestamp(int(line_dict["End"]))
 85 |             lines_list[line_idx] = dict2line(line_dict)
 86 | 
 87 | def load_styles(lines_list):
 88 |     styles_dict = {}
 89 |     for line in lines_list:
 90 |         if line.startswith("Style: "):
 91 |             line_dict = style2dict(line)
 92 |             styles_dict[line_dict["Name"]] = line_dict
 93 | 
 94 |     return styles_dict
 95 | 
 96 | def widen_box(box, ratio):
 97 |     # Based on screen height
 98 |     ratio = ratio - 1
 99 |     pixels = script_height * ratio
100 |     return (round(box[0] - pixels), round(box[1] - pixels), round(box[2] + pixels), round(box[3] + pixels))
101 | 
102 | def get_text_box_sign(text, font, xy):
103 |     text_box = draw.textbbox(xy, text=text, font=font)
104 |     out_box = widen_box((xy[0], xy[1], text_box[2], text_box[3]), widen_ratio)
105 |     return out_box # (top_left x, top_left y, bottom right x, bottom right y)
106 | 
107 | def get_text_box_sign_an8(text, font, xy):
108 |     text_box = draw.textbbox(xy, text=text, font=font, anchor="ma", align='center')
109 |     out_box = widen_box((text_box[0], xy[1], text_box[2], text_box[3]), widen_ratio)
110 |     return out_box # (top_left x, top_left y, bottom right x, bottom right y)
111 | 
112 | def get_text_box_dialogue(text):
113 |     out_box = draw.textbbox((script_width/2, script_height - margin_v), text=text.replace("\\N", "\n"),
114 |                              font=dialogue_font, anchor="md", align='center', spacing=dialogue_spacing)
115 |     return widen_box(out_box, widen_ratio) # (top_left x, top_left y, bottom right x, bottom right y)
116 | 
117 | def get_text_box_dialogue_top(text, stack_size = 0):
118 |     text = "\n" * stack_size + text
119 |     out_box = draw.textbbox((script_width/2, margin_v), text=text.replace("\\N", "\n"),
120 |                              font=dialogue_font, anchor="ma", align='center', spacing=dialogue_spacing)
121 |     return widen_box(out_box, widen_ratio) # (top_left x, top_left y, bottom right x, bottom right y)
122 | 
123 | def time_overlap(line1, line2):
124 |     return max(int(line1["Start"]), int(line2["Start"])) < min(int(line1["End"]), int(line2["End"]))
125 | 
126 | def pos_overlap(rect1, rect2):
127 |     x1_tl, y1_tl, x1_br, y1_br = rect1
128 |     x2_tl, y2_tl, x2_br, y2_br = rect2
129 | 
130 |     if x1_tl > x2_br or x2_tl > x1_br:
131 |         return False
132 | 
133 |     if y1_tl > y2_br or y2_tl > y1_br:
134 |         return False
135 | 
136 |     return True
137 | 
138 | def get_pos(text):
139 |     pattern = re.compile(r'\\pos\((?P<xpos>[\d.]+),(?P<ypos>[\d.]+)\)')
140 | 
141 |     match = pattern.search(text)
142 | 
143 |     if match:
144 |         xpos = float(match.group('xpos'))
145 |         ypos = float(match.group('ypos'))
146 |         return (xpos, ypos)
147 |     else:
148 |         return None
149 | 
150 | def make_caption_areas(lines_list, styles_dict):
151 |     caption_list = []
152 | 
153 |     for line_idx, line in enumerate(lines_list):
154 |         if not line.startswith("Dialogue: "):
155 |             continue
156 | 
157 |         line_dict = line2dict(line)
158 | 
159 |         if "Caption" in line_dict["Style"]:
160 |             pos = get_pos(line_dict["Text"])
161 |             if not pos:
162 |                 # print(f"caption without pos: {line}")
163 |                 continue
164 |             style = styles_dict[line_dict["Style"]]
165 |             fontsize = int(style["Fontsize"])
166 |             font = ImageFont.load_default(fontsize)
167 |             text = line_dict["Text"].split("}")[-1].strip("\n")
168 |             if "{\\an7}" in line_dict["Text"]:
169 |                 line_dict["Area"] = get_text_box_sign(text, font, pos)
170 |             else:
171 |                 line_dict["Area"] = get_text_box_sign_an8(text, font, pos)
172 |             # print(line_dict)
173 |             caption_list.append(line_dict)
174 | 
175 |     return caption_list
176 | 
177 | def check_an8_spot(line_dict, an8_events, captions_list):
178 |     # Do not go to an8 if there will be an overlap with a predefined an8 line
179 |     for event in an8_events:
180 |         if time_overlap(event, line_dict):
181 |             return False
182 | 
183 |     # Check if overlaps with a sign when sent to an8
184 |     # Doesn't check if stacking an8 lines overlap currently
185 |     for caption_line in captions_list:
186 |         if not time_overlap(line_dict, caption_line):
187 |             continue
188 | 
189 |         text = line_dict["Text"].split("}")[-1].strip("\n")
190 |         dialogue_box = get_text_box_dialogue_top(text)
191 | 
192 |         if pos_overlap(caption_line["Area"], dialogue_box):
193 |             return False
194 | 
195 |     return True
196 | 
197 | def get_an8_events(lines_list, styles_dict):
198 |     an8_events = []
199 | 
200 |     for line in lines_list:
201 |         if line.startswith("Dialogue: "):
202 |             line_dict = line2dict(line)
203 |             if styles_dict[line_dict["Style"]]["Alignment"] == "8":
204 |                 an8_events.append(line_dict)
205 | 
206 |     return an8_events
207 | 
208 | def send_stacked_lines_to_top(lines_list, styles_dict, bottom_line, an8_events, captions_list):
209 |     for line_idx, line in enumerate(lines_list):
210 |         if not line.startswith("Dialogue: "):
211 |             continue
212 | 
213 |         line_dict = line2dict(line)
214 | 
215 |         if "Subtitle" in line_dict["Style"] and styles_dict[line_dict["Style"]]["Alignment"] == "2":
216 |             if time_overlap(line_dict, bottom_line):
217 |                 if not check_an8_spot(line_dict, an8_events, captions_list):
218 |                     continue
219 |                 if "\\an8" not in line_dict["Text"]:
220 |                     line_dict["Text"] = "{\\an8}" + line_dict["Text"]
221 |                     lines_list[line_idx] = dict2line(line_dict)
222 | 
223 | 
224 | def check_dialogue_overlap(lines_list, captions_list, styles_dict):
225 |     an8_events = get_an8_events(lines_list, styles_dict)
226 | 
227 |     for line_idx, line in enumerate(lines_list):
228 |         if not line.startswith("Dialogue: "):
229 |             continue
230 | 
231 |         line_dict = line2dict(line)
232 | 
233 |         if "Subtitle" in line_dict["Style"] and styles_dict[line_dict["Style"]]["Alignment"] == "2":
234 |             for caption_line in captions_list:
235 |                 if not time_overlap(line_dict, caption_line):
236 |                     continue
237 | 
238 |                 text = line_dict["Text"].split("}")[-1].strip("\n")
239 |                 dialogue_box = get_text_box_dialogue(text)
240 | 
241 |                 if pos_overlap(caption_line["Area"], dialogue_box):
242 |                     if not check_an8_spot(line_dict, an8_events, captions_list):
243 |                         break
244 |                     # Send stacked lines to an8
245 |                     send_stacked_lines_to_top(lines_list, styles_dict, line_dict, an8_events, captions_list)
246 |                     if "\\an8" not in line_dict["Text"]:
247 |                         line_dict["Text"] = "{\\an8}" + line_dict["Text"]
248 |                         lines_list[line_idx] = dict2line(line_dict)
249 |                     break
250 | 
251 | def fix_line_switching(lines_list, captions_list, styles_dict):
252 |     # Time based sending of lines to an8 if they are sandwiched between two an8 events that are within a timeframe
253 |     an8_events = get_an8_events(lines_list, styles_dict)
254 | 
255 |     for line_idx, line in enumerate(lines_list):
256 |         # First an8 line
257 |         if not line.startswith("Dialogue: "):
258 |             continue
259 |         cur_line_dict = line2dict(lines_list[line_idx])
260 |         if "\\an8" not in cur_line_dict["Text"]:
261 |             continue
262 |         cur_end_time = int(cur_line_dict["End"])
263 | 
264 |         # Look for next an8 within threshold
265 |         lookahead = 1
266 |         while True:
267 |             try:
268 |                 next_line_dict = line2dict(lines_list[line_idx + lookahead])
269 |                 next_start_time = int(next_line_dict["Start"])
270 |                 if (next_start_time - cur_end_time) > line_switching_threshold:
271 |                     break
272 |                 if "{\\an8}" not in next_line_dict["Text"]:
273 |                     lookahead += 1
274 |                     continue
275 |                 for i in range(line_idx + 1, line_idx + lookahead):
276 |                     # Set in between lines to an8
277 |                     line_dict = line2dict(lines_list[i])
278 |                     if check_an8_spot(line_dict, an8_events, captions_list) and "\\an8" not in line_dict["Text"]:
279 |                         line_dict["Text"] = "{\\an8}" + line_dict["Text"]
280 |                         lines_list[i] = dict2line(line_dict)
281 |                 break
282 |             except Exception:
283 |                 break
284 | 
285 |     # Inverse ducking sandwich. Insane edge case feel free to remove.
286 | 
287 |     counter = 0
288 | 
289 |     for line_idx, line in enumerate(lines_list):
290 |         if not line.startswith("Dialogue: "):
291 |             continue
292 | 
293 |         if counter < 2:
294 |             counter += 1
295 |             continue
296 | 
297 |         line_a = line2dict(lines_list[line_idx - 2])
298 |         line_b = line2dict(lines_list[line_idx - 1])
299 |         line_c = line2dict(lines_list[line_idx])
300 | 
301 |         if ("\\an8" not in line_a["Text"]) and ("\\an8" in line_b["Text"]) and ("\\an8" not in line_c["Text"]):
302 |             for caption_line in captions_list:
303 |                 if not time_overlap(line_b, caption_line):
304 |                     continue
305 | 
306 |                 text_a = line_a["Text"].split("}")[-1].strip("\n")
307 |                 text_c = line_c["Text"].split("}")[-1].strip("\n")
308 | 
309 |                 # If it would have not gone to an8 with the text box of the line before it, send it back to an2
310 |                 dialogue_box_a = get_text_box_dialogue_top(text_a)
311 |                 dialogue_box_c = get_text_box_dialogue_top(text_c)
312 | 
313 |                 if pos_overlap(caption_line["Area"], dialogue_box_a) or pos_overlap(caption_line["Area"], dialogue_box_c):
314 |                     line_b["Text"] = line_b["Text"].replace("{\\an8}", "")
315 |                     lines_list[line_idx - 1] = dict2line(line_b)
316 |                     break
317 | 
318 | def main(inpath, outpath):
319 | 
320 |     with open(inpath, encoding="utf-8") as infile:
321 |         lines_list = infile.readlines()
322 | 
323 |     convert_to_centiseconds(lines_list)
324 | 
325 |     styles_dict = load_styles(lines_list)
326 |     captions_list = make_caption_areas(lines_list, styles_dict)
327 |     check_dialogue_overlap(lines_list, captions_list, styles_dict)
328 |     fix_line_switching(lines_list, captions_list, styles_dict)
329 | 
330 |     convert_to_timestamp(lines_list)
331 | 
332 |     with open(outpath, "w", encoding="utf-8") as outfile:
333 |         outfile.writelines(lines_list)
334 | 
335 | if __name__ == "__main__":
336 |     if len(sys.argv) != 3:
337 |         sys.exit(f"Usage: {sys.argv[0]} infile.ass outfile.ass")
338 | 
339 |     main(sys.argv[1], sys.argv[2])
340 | 


--------------------------------------------------------------------------------
/Style_Cleanup/Style_Cleanup.py:
--------------------------------------------------------------------------------
  1 | # Style_Cleanup V1.5
  2 | 
  3 | import sys
  4 | import re
  5 | 
  6 | DEFAULT_STYLE = "SPOverrideF,50,&H00FFFFFF,&H00FFFFFF,&H00000000,&HA0000000,-1,0,0,0,100,100,0,0,1,2.4,1,2,40,40,40,1"
  7 | ALT_STYLE = "SPOverrideF,50,&H00FFFFFF,&H00FFFFFF,&H00743E15,&HA0000000,-1,0,0,0,100,100,0,0,1,2.4,1,2,40,40,40,1"
  8 | OVERLAP_TAG = "{\\3c&H743E15&}"
  9 | 
 10 | def event_to_dict(line):
 11 |     line_pattern = re.compile(r"(?P<Format>[^:]*): ?(?P<Layer>\d*), ?(?P<Start>[^,]*), ?(?P<End>[^,]*), ?(?P<Style>[^,]*), ?(?P<Name>[^,]*), ?(?P<MarginL>[^,]*), ?(?P<MarginR>[^,]*), ?(?P<MarginV>[^,]*), ?(?P<Effect>[^,]*),(?P<Text>.*)")
 12 |     match = line_pattern.match(line)
 13 |     return {key: match.group(key) for key in line_pattern.groupindex}
 14 | 
 15 | 
 16 | def dict_to_event(d):
 17 |     return f"{d['Format']}: {d['Layer']},{d['Start']},{d['End']},{d['Style']},{d['Name']},{d['MarginL']},{d['MarginR']},{d['MarginV']},{d['Effect']},{d['Text']}"
 18 | 
 19 | 
 20 | def style_to_dict(line):
 21 |     line_pattern = re.compile(
 22 |         r"(?P<Format>[^:]*): ?(?P<Name>[^,]*), ?(?P<Fontname>[^,]*), ?(?P<Fontsize>[^,]*), ?(?P<PrimaryColour>[^,]*), ?(?P<SecondaryColour>[^,]*), ?(?P<OutlineColour>[^,]*), ?(?P<BackColour>[^,]*), ?(?P<Bold>[^,]*), ?(?P<Italic>[^,]*), ?(?P<Underline>[^,]*), ?(?P<StrikeOut>[^,]*), ?(?P<ScaleX>[^,]*), ?(?P<ScaleY>[^,]*), ?(?P<Spacing>[^,]*), ?(?P<Angle>[^,]*), ?(?P<BorderStyle>[^,]*), ?(?P<Outline>[^,]*), ?(?P<Shadow>[^,]*), ?(?P<Alignment>[^,]*), ?(?P<MarginL>[^,]*), ?(?P<MarginR>[^,]*), ?(?P<MarginV>[^,]*), ?(?P<Encoding>.*)"
 23 |     )
 24 |     match = line_pattern.match(line)
 25 |     return {key: match.group(key) for key in line_pattern.groupindex}
 26 | 
 27 | 
 28 | def dict_to_style(d):
 29 |     return f"Style: {d['Name']},{d['Fontname']},{d['Fontsize']},{d['PrimaryColour']},{d['SecondaryColour']},{d['OutlineColour']},{d['BackColour']},{d['Bold']},{d['Italic']},{d['Underline']},{d['StrikeOut']},{d['ScaleX']},{d['ScaleY']},{d['Spacing']},{d['Angle']},{d['BorderStyle']},{d['Outline']},{d['Shadow']},{d['Alignment']},{d['MarginL']},{d['MarginR']},{d['MarginV']},{d['Encoding']}"
 30 | 
 31 | 
 32 | def generate_styles_list(lines_list):
 33 |     styles_list = []
 34 |     for line in lines_list:
 35 |         if line.startswith("Style: "):
 36 |             styles_list.append(line.strip("\n"))
 37 | 
 38 |     return styles_list
 39 | 
 40 | 
 41 | def generate_events_list(lines_list):
 42 |     events_list = []
 43 |     for line in lines_list:
 44 |         if line.startswith("Dialogue: "):
 45 |             events_list.append(line.strip("\n"))
 46 | 
 47 |     return events_list
 48 | 
 49 | 
 50 | def compare_dictionaries(dict1, dict2, exclude_keys=[]):
 51 |     filtered_dict1 = {k: v for k, v in dict1.items() if k not in exclude_keys}
 52 |     filtered_dict2 = {k: v for k, v in dict2.items() if k not in exclude_keys}
 53 | 
 54 |     return filtered_dict1 == filtered_dict2
 55 | 
 56 | 
 57 | def map_styles_with_inline_tags(styles):
 58 | 
 59 |     style_inline = {}
 60 | 
 61 |     for style in styles:
 62 |         style_dict = style_to_dict(style)
 63 | 
 64 |         if "Subtitle" in style_dict["Name"]:
 65 |             default_style_dict = style_to_dict(f"Style: Default,{DEFAULT_STYLE}")
 66 |             style_inline[style_dict["Name"]] = ""
 67 | 
 68 |             if str(style_dict["Alignment"]) == "8":
 69 |                 style_inline[style_dict["Name"]] += r"\an8"
 70 |             if str(style_dict["Italic"]) == "-1":
 71 |                 style_inline[style_dict["Name"]] += r"\i1"
 72 |             if str(style_dict["Underline"]) == "-1":
 73 |                 style_inline[style_dict["Name"]] += r"\u1"
 74 |             if int(style_dict["Fontsize"]) != int(default_style_dict["Fontsize"]) and str(style_dict["Fontname"]) == str(default_style_dict["Fontname"]):
 75 |                 style_inline[style_dict["Name"]] += r"\fs" + style_dict["Fontsize"]
 76 | 
 77 |         elif "Song" in style_dict["Name"] and "SongCap" not in style_dict["Name"]:
 78 |             style_inline[style_dict["Name"]] = ""
 79 | 
 80 |         elif "Cap" in style_dict["Name"]:
 81 |             style_inline[style_dict["Name"]] = f"\\fn{style_dict['Fontname']}\\fs{style_dict['Fontsize']}\\c&H{style_dict['PrimaryColour'][-6:]}&"
 82 |             if str(style_dict["Italic"]) == "-1":
 83 |                 style_inline[style_dict["Name"]] += r"\i1"
 84 |             if str(style_dict["Underline"]) == "-1":
 85 |                 style_inline[style_dict["Name"]] += r"\u1"
 86 | 
 87 |     return style_inline
 88 | 
 89 | 
 90 | def update_styles_and_inline_tags(styles, events, style_inline):
 91 |     default_style_dict = style_to_dict(f"Style: Default,{DEFAULT_STYLE}")
 92 | 
 93 |     has_alt = False
 94 |     for event in events:
 95 |         if "{\\3c&H743E15&}" in event:
 96 |             has_alt = True
 97 |             break
 98 | 
 99 |     fallback_style_count = 0
100 |     subtitle_other_font_dict = {}
101 | 
102 |     for index, style in enumerate(styles):
103 |         style_dict = style_to_dict(style)
104 | 
105 |         if "Subtitle" in style_dict["Name"]:
106 |             style_dict["Alignment"] = "2"
107 |             style_dict["Italic"] = "0"
108 |             style_dict["Underline"] = "0"
109 |             style_dict["MarginL"] = default_style_dict["MarginL"]
110 |             style_dict["MarginR"] = default_style_dict["MarginR"]
111 |             style_dict["MarginV"] = default_style_dict["MarginV"]
112 |             if int(style_dict["Fontsize"]) != int(default_style_dict["Fontsize"]) and str(style_dict["Fontname"]) == str(default_style_dict["Fontname"]):
113 |                 style_dict["Fontsize"] = default_style_dict["Fontsize"]
114 |             temp_style_name = style_dict["Name"]
115 |             style_dict["Name"] = "Subtitle"
116 | 
117 |             if style_dict["Fontname"] != default_style_dict["Fontname"]:
118 |                 style_dict["Name"] = style_dict["Fontname"]
119 |                 try:
120 |                     break_flag = False
121 |                     for i, stored_style in enumerate(subtitle_other_font_dict[style_dict["Fontname"]]):
122 |                         if compare_dictionaries(style_dict, stored_style, exclude_keys=["Name"]):
123 |                             f_count = i + 1
124 |                             break_flag = True
125 |                             break
126 |                     if not break_flag:
127 |                         subtitle_other_font_dict[style_dict["Fontname"]].append(style_dict)
128 |                         f_count = len(subtitle_other_font_dict[style_dict["Fontname"]])
129 |                 except Exception as e:
130 |                     subtitle_other_font_dict[style_dict["Fontname"]] = [style_dict]
131 |                     f_count = 1
132 |                 style_dict["Name"] = f"Subtitle-{style_dict['Fontname']}-{f_count}"
133 |             elif not compare_dictionaries(style_dict, default_style_dict, exclude_keys=
134 |                                     ["Name", "Fontsize", "Alignment", "Italic", "Underline", "MarginL", "MarginR", "MarginV"]):
135 |                 fallback_style_count += 1
136 |                 style_dict["Name"] = "Subtitle-" + str(fallback_style_count)
137 | 
138 |             styles[index] = dict_to_style(style_dict)
139 | 
140 |             for i, event in enumerate(events):
141 |                 event_dict = event_to_dict(event)
142 |                 if event_dict["Style"] == temp_style_name:
143 |                     if OVERLAP_TAG in event_dict["Text"]:
144 |                         event_dict["Style"] = "Subtitle-Alt"
145 |                         event_dict["Text"] = event_dict["Text"].replace(OVERLAP_TAG, "")
146 |                     else:
147 |                         event_dict["Style"] = style_dict["Name"]
148 |                     if style_inline[temp_style_name]:
149 |                         event_dict["Text"] = f"{{{style_inline[temp_style_name]}}}{event_dict['Text']}"  # prepend text with inline tags
150 |                     events[i] = dict_to_event(event_dict)
151 | 
152 |         elif "Song" in style_dict["Name"] and "SongCap" not in style_dict["Name"]:
153 |             style_dict["Name"] = "Song"
154 |             styles[index] = dict_to_style(style_dict)
155 | 
156 |             for i, event in enumerate(events):
157 |                 event_dict = event_to_dict(event)
158 |                 if "Song" in event_dict["Style"]:
159 |                     event_dict["Style"] = "Song"
160 |                     events[i] = dict_to_event(event_dict)
161 | 
162 |         elif "Cap" in style_dict["Name"]:
163 |             temp_style_name = style_dict["Name"]
164 |             if "Caption" in style_dict["Name"]:
165 |                 style_dict["Name"] = "Caption"
166 |             elif "SongCap" in style_dict["Name"]:
167 |                 style_dict["Name"] = "SongCap"
168 |             style_dict["Fontname"] = default_style_dict["Fontname"]
169 |             style_dict["Fontsize"] = default_style_dict["Fontsize"]
170 |             style_dict["PrimaryColour"] = default_style_dict["PrimaryColour"]
171 |             style_dict["SecondaryColour"] = default_style_dict["SecondaryColour"]
172 |             style_dict["Alignment"] = "7"
173 |             style_dict["Italic"] = "0"
174 |             style_dict["Underline"] = "0"
175 | 
176 |             styles[index] = dict_to_style(style_dict)
177 | 
178 |             for i, event in enumerate(events):
179 |                 event_dict = event_to_dict(event)
180 |                 if event_dict["Style"] == temp_style_name:
181 |                     event_dict["Style"] = style_dict["Name"]
182 |                     if style_inline[temp_style_name]:
183 |                         if event_dict["Text"].count("{\\an7}") > 0:
184 |                             event_dict["Text"] = event_dict["Text"].replace("{\\an7}", "")
185 |                         else:
186 |                             event_dict["Text"] = "{\\an8}" + event_dict["Text"]
187 |                         event_dict["Text"] = f"{{{style_inline[temp_style_name]}}}{event_dict['Text']}"  # prepend text with inline tags
188 |                         event_dict["Text"] = event_dict["Text"].replace("}{", "")
189 |                     events[i] = dict_to_event(event_dict)
190 | 
191 |     if has_alt:
192 |         styles.append(f"Style: Subtitle-Alt,{ALT_STYLE}")
193 | 
194 |     new_styles = []
195 |     for i in styles:
196 |         if i not in new_styles and not i.startswith("Style: Default"):
197 |             new_styles.append(i)
198 |     styles.clear()
199 |     styles.extend(new_styles)
200 | 
201 | 
202 | def sort_styles(styles):
203 |     def custom_sort_key(style):
204 |         if "Subtitle" in style and "Alt" not in style:
205 |             return (0, style)
206 |         if "Alt" in style:
207 |             return (1, style)
208 |         if "Song" in style and "SongCap" not in style:
209 |             return (2, style)
210 |         if "Cap" in style:
211 |             return (3, style)
212 |         return (4, style)
213 | 
214 |     styles.sort(key=custom_sort_key)
215 | 
216 | 
217 | def update_lines_list(lines_list, styles, events):
218 |     new_styles = [style + "\n" for style in styles]
219 |     new_events = [event + "\n" for event in events]
220 |     out_list = [line for line in lines_list if not line.startswith("Style: ")]
221 |     out_list = [line for line in out_list if not line.startswith("Dialogue: ")]
222 | 
223 |     for idx, line in enumerate(out_list):
224 |         if line.startswith("Format: Name, Fontname, Fontsize"):
225 |             index_of_styles = idx + 1
226 |             break
227 |     out_list = out_list[:index_of_styles] + new_styles + out_list[index_of_styles:]
228 | 
229 |     for idx, line in enumerate(out_list):
230 |         if line.startswith("Format: Layer, Start, End"):
231 |             index_of_events = idx + 1
232 |             break
233 |     out_list = out_list[:index_of_events] + new_events + out_list[index_of_events:]
234 | 
235 |     return out_list
236 | 
237 | 
238 | def main(inpath, outpath):
239 |     with open(inpath, encoding="utf-8") as infile:
240 |         lines_list = infile.readlines()
241 | 
242 |     styles = generate_styles_list(lines_list)
243 |     events = generate_events_list(lines_list)
244 |     style_inline = map_styles_with_inline_tags(styles)
245 |     update_styles_and_inline_tags(styles, events, style_inline)
246 |     sort_styles(styles)
247 |     out_list = update_lines_list(lines_list, styles, events)
248 | 
249 |     with open(outpath, "w", encoding="utf-8") as outfile:
250 |         outfile.writelines(out_list)
251 | 
252 | 
253 | if __name__ == "__main__":
254 |     if len(sys.argv) != 3:
255 |         sys.exit(f"Usage: {sys.argv[0]} infile.ass outfile.ass")
256 | 
257 |     main(sys.argv[1], sys.argv[2])


--------------------------------------------------------------------------------