├── .gitignore ├── LICENSE ├── README.md ├── deprecated ├── extract_subtitles_old.py └── ocr.py ├── extract_subtitles.py ├── gui_crop_select.py ├── libs ├── __package__.py ├── cv_utils.py ├── fun_utils.py └── lrc.py ├── requirements.txt ├── timeline_ops.py └── vcat_subtitle_imgs.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | 106 | # pyCharm 107 | .idea/ 108 | 109 | # examples 110 | image_examples/ 111 | video_examples/ 112 | 113 | frames/ 114 | *.mp4 115 | *.mp3 116 | *.mkv 117 | *.lrc 118 | *.srt -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Xiao Tian 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Subtitles Extraction 2 | 3 | Extract key frames from [Amanpreet Walia](https://github.com/amanwalia92). 4 | 5 | This project is used to extract subtitles from the video. First, the key frames is extracted from the video, and then the subtitle area of the frame picture is cropped, and the text is recognized by the OCR. 6 | 7 | ## Getting Started 8 | 9 | ### Install following dependences 10 | 11 | - __OpenCV-Python__ (used for basic video processing e.g. read-frame-stream, crop, frame-diff, processing-gui) 12 | - __PyTesseract__ (only use its `image_to_string(img, lang)`) 13 | - NumPy (`smooth` filter) (find it [here](http://www.lfd.uci.edu/~gohlke/pythonlibs/#numpy)) 14 | - SciPy (`signal.argrelextrema`) 15 | - StrsimPy (`NormalizedLevenshtein` string similiarity) 16 | - Matplotlib (draw frame differences stem plot) 17 | - ProgressBar 18 | 19 | Install missing dependences first using `pip install -r requirements.txt` 20 | 21 | ### Install Tesseract OCR 22 | 23 | [Download](https://github.com/UB-Mannheim/tesseract/wiki) and (try) run it, select language support in `tesseract --list-lang` if you want. 24 | 25 | ### Run 26 | 27 | ``` 28 | λ python extract_subtitles.py 29 | 30 | ./extract_subtitles.py -crop '0(907,940)[101,77]' -lang eng a.flv ; ./timeline_ops.py merge frames/a.flv/timeline.txt 0|./timeline_ops.py to-lrc 1 srt >a.srt 31 | # srt Audacity ops referTo my GH:mkey/yy/atag2srt 32 | #and GH:tv/arecog.sh , GH:MontagePy/_1c 33 | # export OPENCV_OPENCL_DEVICE='Clover' # if VideoReader fails 34 | 35 | ./extract_subtitles.py -crop '0(883,484)[134,120]' -lang index --chunk-size 600 --crop-debug a.flv 36 | ``` 37 | 38 | `./montage1_c.py -font /usr/share/fonts/wenquanyi/*/wqy-zenhei.ttc a.mp4 --subtitle b.srt -font-size 25 --mon-background '#6e6c39' --subtitle-placeholder 好 -spacing :5,3 -key-color '#000000' --key-thres 3` 39 | 40 | ## License 41 | 42 | This project is licensed under the MIT License - see [LICENSE](LICENSE) for details 43 | -------------------------------------------------------------------------------- /deprecated/extract_subtitles_old.py: -------------------------------------------------------------------------------- 1 | #!/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | from typing import Tuple 5 | 6 | from argparse import ArgumentParser, FileType 7 | from re import findall 8 | from pathlib import Path 9 | from os import remove 10 | from progressbar import ProgressBar 11 | 12 | import cv2 13 | from cv2 import CAP_PROP_FRAME_COUNT, CAP_PROP_FPS, CAP_PROP_FRAME_WIDTH, CAP_PROP_FRAME_HEIGHT 14 | from pytesseract import image_to_string 15 | 16 | import numpy as np 17 | from scipy.signal import argrelextrema 18 | 19 | import matplotlib.pyplot as plot 20 | 21 | 22 | class PatternType: 23 | def __init__(self, regex, transform = lambda x: x): 24 | self.regex, self.transform = regex, transform 25 | def __call__(self, string): 26 | return [self.transform(group) for group in findall(self.regex, string)] 27 | def __repr__(self): return f"PatternType({self.regex})" 28 | 29 | def toMapper(transform): 30 | return lambda xs: [transform(x) for x in xs] 31 | 32 | def zipWithNext(xs): 33 | assert len(xs) >= 2, f"{len(xs)} too short (< 2)" 34 | for i in range(1, len(xs)): 35 | yield (xs[i-1], xs[i]) 36 | 37 | def require(p, message): 38 | if not p: raise(ValueError(message)) 39 | 40 | def snakeSplit(text): return text.strip().split("_") 41 | def titleCased(texts, sep=" "): return sep.join(map(str.capitalize, texts)) 42 | 43 | def printAttributes(fmt=lambda k, v: f"[{titleCased(snakeSplit(k))}] {v}", sep="\n", **kwargs): 44 | text = sep.join([fmt(k, v) for (k, v) in kwargs.items()]) 45 | print(text) 46 | 47 | def printResult(op): 48 | def _invoke(*args, **kwargs): 49 | res = op(*args, **kwargs) 50 | print(res); return res 51 | return _invoke 52 | 53 | 54 | 55 | def smooth(x, len_window, window = "hanning") -> np.array: 56 | supported_windows = ["flat", "hanning", "hamming", "bartlett", "blackman"] 57 | print("smooth", len(x), len_window) 58 | if len_window < 3: return x 59 | require(x.ndim == 1, "smooth only accepts 1 dimension arrays") 60 | require(x.size >= len_window, "input vector must >= window size") 61 | 62 | require(window in supported_windows, f"window must of {supported_windows}") 63 | 64 | s = np.r_[2 * x[0] - x[len_window:1:-1], 65 | x, 2 * x[-1] - x[-1:-len_window:-1]] 66 | w = getattr(np, window)(len_window) if window != "flat" else np.ones(len_window, "d") 67 | y = np.convolve(w / w.sum(), s, mode="same") 68 | return y[len_window -1 : -len_window +1] 69 | 70 | def cv2NormalWin(title): 71 | cv2.namedWindow(title, cv2.WINDOW_NORMAL) 72 | 73 | def cv2WaitKey(key_code, delay_ms = 1) -> bool: 74 | require(len(key_code) == 1, f"{repr(key_code)} must be single char") 75 | return cv2.waitKey(delay_ms) & 0xFF == ord(key_code) 76 | 77 | def cv2VideoProps(cap: cv2.VideoCapture, props = (CAP_PROP_FRAME_COUNT, CAP_PROP_FPS, CAP_PROP_FRAME_WIDTH, CAP_PROP_FRAME_HEIGHT)) -> Tuple[int, ...]: 78 | ''' (count, fps, width, height) ''' 79 | return tuple(map(int, map(cap.get, props))) 80 | 81 | class Frame: 82 | ''' Class to hold information about each frame ''' 83 | def __init__(self, no, img, value): 84 | self.no, self.img, self.value = no, img, value 85 | 86 | def __lt__(self, other): return self.no < other.no 87 | def __eq__(self, other): return self.no == other.no 88 | 89 | global app_cfg 90 | 91 | def inFramesDir(name) -> str: return str(app_cfg.frames_dir/name) 92 | filename_frame = lambda it: f"frame_{it.no}.jpg" 93 | 94 | WIN_SUBTITLE_RECT = "Subtitle Rect" 95 | WIN_LAST_FRAME = "Last Frame" 96 | 97 | def postprocessArgs(): 98 | if app_cfg.crop != None: 99 | ((x,y), (w,h)) = app_cfg.crop 100 | app_cfg.crop_cfg = ((y, y+h), (x, x+w)) 101 | 102 | def recognizeText(name) -> str: 103 | img = cv2.imread(inFramesDir(name)) 104 | if app_cfg.crop != None: 105 | ((y, y_end), (x, x_end)) = app_cfg.crop_cfg 106 | croped_img = img[y:y_end, x:x_end] 107 | if app_cfg.crop_debug: 108 | cv2.imshow(WIN_SUBTITLE_RECT, croped_img) 109 | cv2.waitKey(1) 110 | return image_to_string(croped_img, app_cfg.lang) 111 | else: 112 | return image_to_string(img, app_cfg.lang) 113 | 114 | @printResult 115 | def relativeChange(a: float, b: float) -> float: return (b - a) / max(a, b) 116 | 117 | 118 | def solveFrameDifferences(cap: cv2.VideoCapture, on_frame = lambda x: ()) -> Tuple[list, list]: 119 | frames, frame_diffs = [], [] 120 | 121 | index = 0 122 | prev_frame, curr_frame = None, None 123 | 124 | unfinished, img = cap.read() 125 | prev_frame = img #< initial (prev == curr) 126 | cv2NormalWin(WIN_LAST_FRAME) 127 | (n_frame, _, _, _) = cv2VideoProps(cap) 128 | progress = ProgressBar(maxval=n_frame).start() 129 | while unfinished: 130 | curr_frame = cv2.cvtColor(img, cv2.COLOR_BGR2LUV) #luv 131 | if curr_frame is not None: # and prev_frame is not None 132 | diff = cv2.absdiff(curr_frame, prev_frame) #< main logic goes here 133 | count = np.sum(diff) 134 | frame_diffs.append(count) 135 | frame = Frame(index, img, count) 136 | #frames.append(frame) 137 | on_frame(curr_frame) 138 | prev_frame = curr_frame 139 | index = index + 1 140 | progress.update(index) 141 | unfinished, img = cap.read() 142 | if app_cfg.crop_debug: 143 | cv2.imshow(WIN_LAST_FRAME, prev_frame) #< must have single name, to animate 144 | if cv2WaitKey('q'): break 145 | progress.finish() 146 | return (frames, frame_diffs) 147 | 148 | def sortTopFramesDsc(frames, n_top_frames): 149 | ''' sort the list in descending order ''' 150 | frames.sort(lambda it: it.value, reverse=True) 151 | for keyframe in frames[:n_top_frames]: 152 | name = filename_frame(keyframe) 153 | cv2.imwrite(inFramesDir(name), keyframe.frame) 154 | 155 | def writeFramesThreshold(frames): 156 | for (a, b) in zipWithNext(frames): 157 | if relativeChange(np.float(a.value), np.float(b.value)) < app_cfg.thres: continue 158 | #print("prev_frame:"+str(frames[i-1].value)+" curr_frame:"+str(frames[i].value)) 159 | name = filename_frame(a) 160 | cv2.imwrite(inFramesDir(name), a.frame) 161 | 162 | def ocrWithLocalMaxima(frames, frame_diffs, on_new_subtitle = print) -> np.array: 163 | diff_array = np.array(frame_diffs) 164 | sm_diff_array = smooth(diff_array, app_cfg.window_size) 165 | frame_indices = np.subtract(np.asarray(argrelextrema(sm_diff_array, np.greater))[0], 1) 166 | last_subtitle = "" 167 | for frame in map(frames.__getitem__, frame_indices): 168 | name = filename_frame(frame) 169 | cv2.imwrite(inFramesDir(name), frame.img) 170 | subtitle = recognizeText(name) 171 | if subtitle != last_subtitle: 172 | last_subtitle = subtitle #< Check for repeated subtitles 173 | on_new_subtitle(frame.no, subtitle) 174 | remove(inFramesDir(name)) #< Delete recognized frame images 175 | return sm_diff_array 176 | 177 | def drawPlot(diff_array): 178 | plot.figure(figsize=(40, 20)) 179 | plot.locator_params(numticks=100) 180 | plot.stem(diff_array, use_line_collection=True) 181 | plot.savefig(inFramesDir("plot.png")) 182 | 183 | app = ArgumentParser(prog="extract_subtitles", description="extract subtitles using OCR with frame difference algorithm") 184 | apg = app.add_argument_group("basic workflow") 185 | apg.add_argument("video", type=FileType("r"), help="source file to extract from") 186 | apg.add_argument("-sort-top-dsc", metavar="n", nargs="?", type=int, help="make top frames and data descending") 187 | apg.add_argument("-thres", metavar="x.x", nargs="?", type=float, help="fixed threshold value (float)") 188 | apg.add_argument("-no-local-maxima", action="store_true", help="don't apply local maxima criteria") 189 | 190 | ''' 191 | Using crop mode(crop out subtitles area) can greatly improve recognition accuracy, 192 | but you need to manually adjust the crop area by modifying the value of cropper parameters(x, y, w, h). 193 | To debug the appropriate value, set -crop-debug to show cropped result. 194 | ''' 195 | 196 | apg1 = app.add_argument_group("misc settings") 197 | regex_tuple = PatternType(r"\((\d+),(\d+)\)", toMapper(int)) 198 | apg1.add_argument("-crop", metavar="(x,y)(w,h)", type=regex_tuple, default=None, help="crop out subtitles area, improve recognition accuracy") 199 | apg1.add_argument("--crop-debug", action="store_true", help="show cropped result if avaliable") 200 | 201 | apg1.add_argument("-lang", type=str, default="chi_sim", help="OCR language for tesseract engine (tesseract --list-langs)") 202 | apg1.add_argument("-draw-plot", action="store_true", help="draw plot for statics") 203 | apg1.add_argument("-frames-dir", type=Path, default=Path("frames/"), help="directory to store the processed frames") 204 | apg1.add_argument("-window-size", type=int, default=13, help="smoothing window size") 205 | 206 | if __name__ == "__main__": 207 | app_cfg = app.parse_args() 208 | cfg = app_cfg 209 | postprocessArgs() 210 | printAttributes( 211 | video_path=cfg.video.name, 212 | frame_directory=cfg.frames_dir, 213 | subtitle_language=cfg.lang, 214 | crop=cfg.crop 215 | ) 216 | print("Extracting key frames...") 217 | 218 | capture = cv2.VideoCapture(cfg.video.name) 219 | cap_props = cv2VideoProps(capture) 220 | printAttributes(video_props=cap_props) 221 | (frames, frame_diffs) = solveFrameDifferences(capture) 222 | capture.release() 223 | cv2.destroyAllWindows() 224 | if cfg.sort_top_dsc != None: sortTopFramesDsc(frames, cfg.sort_top_dsc) 225 | if cfg.thres != None: writeFramesThreshold(frames) 226 | diff_array = ocrWithLocalMaxima(frames, frame_diffs) if not cfg.no_local_maxima else None 227 | if cfg.draw_plot: drawPlot(diff_array) 228 | -------------------------------------------------------------------------------- /deprecated/ocr.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import cv2 4 | import operator 5 | import numpy as np 6 | import matplotlib.pyplot as plt 7 | import sys 8 | from PIL import Image 9 | import pytesseract 10 | import PIL.ImageOps 11 | 12 | #set cropper parameters 13 | # 我不是药神 100 410 600 40 14 | # minecraft 80,285,550,38 15 | # 狗十三 150 430 600 40 16 | #left_padding 17 | x=150 18 | #top_padding 19 | y=430 20 | #window_width 21 | w=600 22 | #window_height 23 | h=40 24 | 25 | 26 | imagePath = sys.argv[1] 27 | 28 | # src=cv2.imread(imagePath) 29 | # cv2.imshow("Before", src) 30 | # cv2.cvtColor(src,cv2.COLOR_BGR2GRAY) 31 | 32 | im=Image.open(imagePath) 33 | # image=np.asarray(im) 34 | # height,width=image.size 35 | # print(height) 36 | # image2=image.copy() 37 | # for i in range(height): 38 | # for j in range(width): 39 | # image2[i,j]=(255-image[i,j]) 40 | 41 | # image2.show() 42 | 43 | #contrast 44 | #contrasted_im=PIL.ImageOps.autocontrast(im, cutoff=0) 45 | #contrasted_im.show() 46 | 47 | #gray 48 | #grayed_im=PIL.ImageOps.grayscale(im) 49 | #grayed_im.show() 50 | 51 | #invert 52 | inverted_im=PIL.ImageOps.invert(im) 53 | #inverted_im.show() 54 | 55 | 56 | croped_im=inverted_im.crop((x,y,x+w,y+h)) 57 | croped_im.show() 58 | text=pytesseract.image_to_string(croped_im, lang='chi_sim') 59 | print(text) -------------------------------------------------------------------------------- /extract_subtitles.py: -------------------------------------------------------------------------------- 1 | #!/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | from typing import Tuple, List, Iterator 5 | 6 | from argparse import ArgumentParser, Namespace, FileType 7 | from pathlib import Path 8 | from sys import argv, stderr 9 | from re import findall 10 | from functools import reduce 11 | from progressbar import ProgressBar 12 | 13 | from json import dumps 14 | 15 | from libs.fun_utils import let, also, require 16 | from libs.fun_utils import zipWithNext, chunked, collect2, expandRangeStartList 17 | from libs.fun_utils import PatternType, toMapper, printAttributes 18 | from libs.fun_utils import Reducer, AsNoOp 19 | 20 | from libs.cv_utils import Frame, Rect, BasicCvProcess 21 | from libs.cv_utils import smooth as orig_smooth, relativeChange, stringSimilarity 22 | from libs.cv_utils import cv2VideoProps, cv2NormalWin, cv2WaitKey 23 | from libs.cv_utils import cvScale, cvBlur, cvGrayscale 24 | 25 | import cv2 26 | from cv2 import UMat, VideoCapture 27 | from pytesseract import image_to_string 28 | 29 | import numpy as np 30 | from numpy import array, concatenate 31 | from scipy import signal 32 | 33 | import matplotlib.pyplot as plot 34 | 35 | # == App Common Logics == 36 | USE_FEATURE = set([]) 37 | FEAT_DEBUG = "--debug" 38 | FEAT_PROGRESS = "--use-progress" 39 | FEAT_SHARP = "--use-sharp" 40 | NOT_COMMON_PUNTUATION = "#$%&\\()*+-/:;<=>@[]^_`{|}" + "—»™€°" 41 | 42 | feats = USE_FEATURE.__contains__ 43 | 44 | def printDebug(*args, **kwargs): 45 | if feats(FEAT_DEBUG): print(*args, **kwargs, file=stderr) 46 | 47 | def stripAll(symbols, text) -> str: 48 | return text.translate({ord(c):"" for c in symbols}) 49 | 50 | def smooth(a, window_size, window) -> array: 51 | printDebug(f"smooth [...x{len(a)}], {window_size} {window}") 52 | return orig_smooth(a, window_size, window) 53 | 54 | def cvInGrayRange(img: UMat, start: int, end: int) -> UMat: 55 | return cv2.inRange(img, (start,start,start), (end,end,end)) 56 | 57 | class AsProgress(Reducer): 58 | def __init__(self, cap: VideoCapture, crop): 59 | n_frame = cv2VideoProps(cap)[0] 60 | self.progress = ProgressBar(maxval=n_frame).start() 61 | def accept(self, index): 62 | self.progress.update(index) 63 | def finish(self): 64 | self.progress.finish() 65 | 66 | # == Main Algorithm == 67 | class ExtractSubtitles(BasicCvProcess): 68 | ''' 69 | Operation of extracting video subtitle area as text, 70 | - configurable: `cropUMat`, `postprocessUMat`, `onFrameList`, `subtitleShouledReplace`, `postpreocessSubtitle` 71 | - workflow: `runOn`, `solveFrameDifferences`, `findPeaks`, `onFrameList`, `ocrWithLocalMaxima` 72 | ''' 73 | WIN_LAST_IMAGE = "Last Image" 74 | WIN_LAST_FRAME = "Last Frame (processed image)" 75 | WIN_SUBTITLE_RECT = "Subtitle Rect" 76 | 77 | def __init__(self, lang: str, is_crop_debug: bool, diff_save_thres: float, window, window_size, chunk_size, path_frames): 78 | ''' 79 | - lang: language for Tesseract OCR 80 | - is_crop_debug: show OpenCV capture GUI when processing 81 | - diff_save_thres: save threshold for differential frame dropper 82 | - window: windowing kind 83 | - window_size: window size for numpy algorithms 84 | - chunk_size: processing chunk size for `ocrWithLocalMaxima()` 85 | - path_frames: temporary path for frame files 86 | ''' 87 | self.lang, self.is_crop_debug, self.diff_save_thres = lang, is_crop_debug, diff_save_thres 88 | super().__init__(window, window_size, chunk_size, path_frames) 89 | 90 | def cropUMat(self, mat: UMat, crop: List[Rect], index: int) -> UMat: 91 | if crop == None: return mat 92 | cropped_img = crop[0].sliceUMat(mat) 93 | if self.is_crop_debug: 94 | cv2.imshow(ExtractSubtitles.WIN_SUBTITLE_RECT, cropped_img) 95 | cv2WaitKey() 96 | return cropped_img 97 | 98 | def postprocessUMat(self, mat: UMat, index: int) -> UMat: return mat 99 | 100 | def recognizeText(self, frame: Frame) -> str: 101 | return image_to_string(frame.img, self.lang) 102 | 103 | #v frame & subtitles 104 | def onFrameList(self, frames): 105 | if self.diff_save_thres != None: self.writeFramesThresholded(frames) 106 | 107 | def subtitleShouldReplace(self, a, b) -> bool: 108 | return b != a and b.count("\n") == 0 and stringSimilarity(a, b) > (1/4) 109 | 110 | def postprocessSubtitle(self, text) -> str: 111 | return stripAll(NOT_COMMON_PUNTUATION, text) 112 | 113 | def solveFrameDifferences(self, cap: VideoCapture, crop: List[Rect], fold) -> Iterator[Frame]: 114 | require(cap.isOpened(), "failed to open capture") 115 | postprocess = lambda mat, index: self.postprocessUMat(self.cropUMat(mat, crop, index), index) 116 | if self.is_crop_debug: 117 | cv2NormalWin(ExtractSubtitles.WIN_LAST_IMAGE) 118 | cv2NormalWin(ExtractSubtitles.WIN_LAST_FRAME) 119 | reducer = fold(cap, crop) 120 | 121 | index = 0 122 | prev_frame, curr_frame = None, None 123 | unfinished, img = cap.read() 124 | prev_frame = postprocess(img, 0) #< initial (prev == curr) 125 | def go(i): 126 | nonlocal index 127 | index=i; cap.set(cv2.CAP_PROP_POS_FRAMES,i) 128 | while unfinished: 129 | curr_frame = postprocess(img, index) 130 | if self.is_crop_debug: 131 | cv2.imshow(ExtractSubtitles.WIN_LAST_IMAGE, img) 132 | cv2.imshow(ExtractSubtitles.WIN_LAST_FRAME, curr_frame) #< must have single title, to animate 133 | k = cv2WaitKey() 134 | if k == 'q': break 135 | elif k=='`': breakpoint()#use go(i) to change pos 136 | if curr_frame is not None: # and prev_frame is not None 137 | try: 138 | diff = cv2.absdiff(curr_frame, prev_frame) #< main algorithm goes here 139 | yield Frame(index, curr_frame, np.sum(diff)) 140 | except cv2.error: pass 141 | prev_frame = curr_frame 142 | unfinished, img = cap.read() 143 | index = index + 1 144 | reducer.accept(index) 145 | reducer.finish() 146 | 147 | def postprocessDifferences(self, a: array) -> array: return smooth(a, self.window_size, self.window) 148 | def findPeaks(self, a: array) -> array: return np.asarray(signal.argrelextrema(a, np.greater))[0] #< argrelextrema(_) always (x,) 149 | 150 | def ocrWithLocalMaxima(self, frames, reducer) -> Tuple[array, array]: 151 | ''' 152 | - frames: chunked processing using window, reducing memory usage 153 | - reducer: accept (frame, subtitle) 154 | ''' 155 | frame_list, frame_diffs = collect2(lambda it: (it, it.value), frames) 156 | self.onFrameList(frame_list) 157 | 158 | diff_array = self.postprocessDifferences(array(frame_diffs)) 159 | valid_indices = self.findPeaks(diff_array) 160 | for i in valid_indices: 161 | frame = frame_list[i] 162 | if self.is_crop_debug: 163 | cv2.imshow(ExtractSubtitles.WIN_SUBTITLE_RECT, frame.img) 164 | cv2WaitKey() 165 | subtitle = self.recognizeText(frame) if self.lang!="index" else f"{i}" 166 | reducer.accept(frame, subtitle) 167 | reducer.finish() 168 | return (diff_array, valid_indices) 169 | 170 | class DefaultOcrFold(Reducer): 171 | def __init__(self, ctx, name, on_new_subtitle = print): 172 | self.ctx = ctx; self.on_new_subtitle = on_new_subtitle 173 | self.path = self.ctx.path_frames/name 174 | mkdirIfNotExists(self.path) 175 | self.files = [(self.path/f"{group}.txt").open("a+") for group in ["timeline", "loser"]] 176 | self.out_timeline, self.out_lose_subtitle = self.files 177 | self.last_subtitle = "" 178 | self.frame_index = 0 179 | def accept(self, frame, subtitle): 180 | self.out_timeline.write(f"{frame.no} {dumps(subtitle, ensure_ascii=False)}\n") 181 | if self.ctx.subtitleShouldReplace(self.last_subtitle, subtitle): #< check for repeated subtitles 182 | self.last_subtitle = subtitle #v also clean-up new subtitle 183 | self.on_new_subtitle(frame.no, self.ctx.postprocessSubtitle(subtitle)) 184 | else: 185 | self.out_lose_subtitle.write(f"{frame.no} {subtitle}\n") 186 | if self.ctx.is_crop_debug: 187 | cv2.imwrite(str(self.path/f"subtitle_{self.frame_index}.png"), frame.img) 188 | self.frame_index += 1 189 | def finish(self): #< in (single chunk) OCR 190 | for f in self.files: f.flush() 191 | def finishAll(self): 192 | for f in self.files: f.close() 193 | 194 | def runOn(self, cap: VideoCapture, crop: List[Rect], fold = DefaultOcrFold, name = "default") -> Tuple[array, array]: 195 | ''' 196 | - cap: video input 197 | - crop: Rect area for lyric graphics 198 | - fold: init (self, name) 199 | ''' 200 | frames = self.solveFrameDifferences(cap, crop, AsProgress if feats(FEAT_PROGRESS) else AsNoOp) 201 | reducer = fold(self, name) 202 | processChunk = lambda it: self.ocrWithLocalMaxima(it, reducer) 203 | diff_array_parts = map(processChunk, chunked(self.chunk_size, frames)) 204 | def concatResults(a, b) -> Tuple[array, array]: 205 | a0, a1 = a 206 | b0, b1 = b 207 | ab0 = concatenate(array([a0, b0],dtype=object)) 208 | ab1 = concatenate(array([a1, b1+len(a0)],dtype=object)) 209 | return (ab0, ab1) 210 | (diff_array, indices) = reduce(concatResults, diff_array_parts) 211 | 212 | reducer.finishAll() 213 | cv2.destroyAllWindows() 214 | return (diff_array, indices) 215 | 216 | def writeFramesThresholded(self, frames): 217 | for (a, b) in zipWithNext(frames): 218 | if b.value == 0: continue #< what if no motion between (last-1)&last ? 219 | k_change = relativeChange(np.float(a.value), np.float(b.value)) 220 | if k_change < self.diff_save_thres: continue 221 | printDebug(f"[{b.no}]({k_change}) prev: {a.value}, curr: {b.value}") 222 | cv2.imwrite(self.frameFilepath(a), a.img) 223 | 224 | def drawPlot(self, diff_array, indices): 225 | fig_diff = plot.figure(figsize=(40, 20)) 226 | plot.xlabel("Frame.no") 227 | plot.ylabel("differences") 228 | plot.locator_params(100) 229 | plot.stem(diff_array, linefmt=":", use_line_collection=True) 230 | plot.stem(indices, [diff_array[i] for i in indices], use_line_collection=True) 231 | return fig_diff 232 | 233 | 234 | # == Main == 235 | def makeArgumentParser(): 236 | app = ArgumentParser( 237 | prog="extract_subtitles", 238 | description="Extract subtitles using OpenCV / Tesseract OCR with frame difference algorithm") 239 | 240 | apg = app.add_argument_group("basic workflow") 241 | apg.add_argument("video", nargs="+", type=FileType("r"), help="source file to extract from") 242 | apg.add_argument("-crop", metavar="frame(x,y)[w,h]", 243 | type=PatternType(r"(\d+)\((\d+),(\d+)\)\[(\d+),(\d+)\]", toMapper(int)), 244 | default=None, help="crop out subtitles area, improve recognition accuracy") 245 | apg.add_argument("-filter-code", type=str, default="it", help="(it: cv2.UMat) pipe function") 246 | apg.add_argument("-lang", type=str, default="eng", help="OCR language for Tesseract `tesseract --list-langs`") 247 | apg.add_argument("-save-thres", metavar="x.x", type=float, default=None, help="add frame store for fixed save threshold value") 248 | 249 | apg1 = app.add_argument_group("misc settings") 250 | apg1.add_argument("--crop-debug", action="store_true", help="show OpenCV GUI when processing") 251 | apg1.add_argument("--draw-plot", action="store_true", help="draw difference plot for statics") 252 | apg1.add_argument(FEAT_SHARP, action="store_true", help="use non-smooth differential (improve for timeline, slower)") 253 | apg1.add_argument(FEAT_PROGRESS, action="store_true", help="show progress bar") 254 | apg1.add_argument(FEAT_DEBUG, action="store_true", help="print debug info") 255 | apg1.add_argument("--only-images", action="store_true", help="use frame images from --crop-debug as input") 256 | BasicCvProcess.registerArguments(apg1) 257 | return app 258 | 259 | def mkdirIfNotExists(self: Path): 260 | if not self.exists(): self.mkdir() 261 | 262 | def makeExtractor(cfg: Namespace, cls_extract=ExtractSubtitles) -> ExtractSubtitles: 263 | lang, crop, crop_debug, save_thres, window, window_size, chunk_size, frames_dir = cfg.lang, cfg.crop, cfg.crop_debug, cfg.save_thres, cfg.window, cfg.window_size, cfg.chunk_size, cfg.frames_dir 264 | printAttributes( 265 | subtitle_language=lang, 266 | crop=crop, 267 | save_threshold=save_thres, 268 | filter_window=window, 269 | filter_window_size=window_size, 270 | process_chunk_size=chunk_size, 271 | frame_directory=frames_dir 272 | ) 273 | if cfg.use_sharp: #< assign extra config 274 | USE_FEATURE.add(FEAT_SHARP) 275 | if cfg.use_progress: 276 | USE_FEATURE.add(FEAT_PROGRESS) 277 | if cfg.debug: 278 | USE_FEATURE.add(FEAT_DEBUG) 279 | 280 | extractor = cls_extract(lang, crop_debug, save_thres, 281 | window, window_size, chunk_size, also(mkdirIfNotExists, Path(frames_dir)) ) 282 | return extractor 283 | 284 | class EvalFilterExtractSubtitle(ExtractSubtitles): 285 | def __init__(self, *args, filter_code = "it"): 286 | ''' filter_code: Python expr about `(it: cv2.UMat)` results `cv2.UMat` ''' 287 | super().__init__(*args) 288 | self.mat_filter = eval(compile(f"lambda it, i: {filter_code}", "", "eval")) 289 | self.is_sharp = feats(FEAT_SHARP) 290 | def postprocessUMat(self, mat, index): 291 | return self.mat_filter(mat, index) 292 | def postprocessDifferences(self, a) -> array: 293 | return (a if self.is_sharp else super().postprocessDifferences(a)) 294 | 295 | class CropEvalFilterExtractSubtitle(EvalFilterExtractSubtitle): 296 | def cropUMat(self, mat, crop, index) -> UMat: 297 | cropped_img = crop[index].sliceUMat(mat) 298 | if self.is_crop_debug: 299 | cv2.imshow(ExtractSubtitles.WIN_SUBTITLE_RECT, cropped_img) 300 | cv2WaitKey() 301 | return cropped_img 302 | 303 | # == Entry == 304 | def main(args): 305 | app = makeArgumentParser() 306 | cfg = app.parse_args(args) 307 | cls_extract = lambda *args: (EvalFilterExtractSubtitle if cfg.crop == None or len(cfg.crop) <= 1 else CropEvalFilterExtractSubtitle) (*args, filter_code=cfg.filter_code) 308 | extractor = makeExtractor(cfg, cls_extract=cls_extract) 309 | 310 | def drawPlot(diff_array, indices): 311 | if not cfg.draw_plot: return 312 | fig_diff = extractor.drawPlot(diff_array, indices) 313 | print(indices) 314 | plot.title(f"Filtered differential sum for {video_name}") 315 | plot.show() 316 | fig_diff.savefig(cfg.frames_dir/f"plot_{video_name}.png") 317 | 318 | def makeCrops(n_frames): 319 | #v [(t, x,y, w,h), ...] 320 | key = lambda it: it[0]; makeRect = lambda it: Rect(*it[1:]) 321 | crops = let(lambda t: [makeRect(t[0])] if len(t) == 1 else expandRangeStartList(n_frames, t, key=key, value=makeRect), cfg.crop) 322 | if crops != None: require(crops[0] != None, "first crop area must started at frame 0") 323 | #^ only when multi-crop enabled 324 | return crops 325 | 326 | def readInt(s): return int(findall(r"(\d+)", s)[0]) 327 | 328 | pathes = map(lambda it: it.name, cfg.video) 329 | if cfg.only_images: 330 | extractor.postprocessDifferences = lambda diffs: diffs 331 | extractor.findPeaks = lambda a: range(0, len(a)) #< required for smooth & peak estim. bypass 332 | frames = sorted([Frame(readInt(path), cv2.imread(path), 0) for path in pathes]) 333 | print(array([it.no for it in frames])) #< NOTE: I don't know if len(sorted(a)) gets shorter second time access 334 | 335 | reducer = ExtractSubtitles.DefaultOcrFold(extractor, "only_images") 336 | _, indices = extractor.ocrWithLocalMaxima(frames, reducer) 337 | reducer.finishAll() 338 | cv2.destroyAllWindows() 339 | 340 | for path in pathes: 341 | video_name = Path(path).name 342 | printAttributes(video_path=path) 343 | print("Extracting key frames...") 344 | 345 | capture = VideoCapture(path) 346 | n_frames, fps, w, h = cv2VideoProps(capture) 347 | printAttributes(video_playback=(n_frames, fps), video_dimens=(w, h)) 348 | 349 | (diff_array, indices) = extractor.runOn(capture, makeCrops(n_frames), name=video_name) 350 | capture.release() 351 | drawPlot(diff_array, indices) 352 | 353 | if __name__ == "__main__": main(argv[1:]) #< no program name 354 | -------------------------------------------------------------------------------- /gui_crop_select.py: -------------------------------------------------------------------------------- 1 | #!/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | from typing import Optional, Tuple, Iterator, cast 5 | 6 | from argparse import ArgumentParser, FileType 7 | 8 | from cv2 import VideoCapture, UMat, imshow, rectangle 9 | from cv2 import namedWindow, destroyWindow, WINDOW_NORMAL 10 | from cv2 import setMouseCallback, waitKey 11 | from cv2 import EVENT_LBUTTONDOWN, EVENT_LBUTTONUP 12 | from cv2 import CAP_PROP_POS_FRAMES 13 | 14 | def cv2WaitKey(block_ms = 1) -> str: 15 | return chr(waitKey(block_ms) & 0xFF) 16 | 17 | def rectLtrd2Xywh(lt: Tuple[int, int], rd: Tuple[int, int]) -> Tuple[int, int, int, int]: 18 | x1, y1 = lt 19 | x2, y2 = rd 20 | return (x1, y1, x2-x1, y2-y1) 21 | 22 | def cv2Crop(mat: UMat, ltrd) -> UMat: 23 | x,y, w,h = rectLtrd2Xywh(*ltrd) 24 | return mat[y:y+h, x:x+w] 25 | 26 | # == App == 27 | def guiSelectionUMat(mat: UMat, title="Rect Selection (r:replot; c:OK)", box_color=(0xFF, 0x00, 0x00), thickness=3) -> Optional[Tuple[Tuple[int, int], Tuple[int, int]]]: 28 | ''' (left_top, right_down) ''' 29 | p_lt, p_rd = None, None 30 | shot = mat.copy() 31 | def onMouseEvent(event, x, y, flags, param): 32 | nonlocal p_lt, p_rd, shot 33 | if event == EVENT_LBUTTONDOWN: 34 | p_lt = (x, y) 35 | elif event == EVENT_LBUTTONUP: 36 | p_rd = (x, y) 37 | rectangle(shot, p_lt, p_rd, box_color, thickness) 38 | imshow(title, shot) 39 | namedWindow(title, WINDOW_NORMAL) 40 | setMouseCallback(title, onMouseEvent) 41 | imshow(title, shot) 42 | while True: 43 | key = cv2WaitKey(0) 44 | if key == 'c' and (p_lt != None and p_rd != None): 45 | destroyWindow(title) 46 | return cast(Tuple, (p_lt, p_rd)) 47 | elif key == 'r': 48 | shot = mat.copy() 49 | imshow(title, shot) 50 | elif key == 'q': 51 | return None 52 | 53 | def selectCropRects(cap: VideoCapture, title = "Video (c:OK; q:finished, <:-; >:=)", title_preview = "Preview", d_seek=5) -> Iterator[Tuple[int, Tuple[int, int, int, int]]]: 54 | ''' position&size (x, y, w, h) ''' 55 | index = 0 56 | ltrd = None 57 | def seek(n): 58 | nonlocal index 59 | index += n 60 | cap.set(CAP_PROP_POS_FRAMES, index) 61 | def handleSeek(key): 62 | if key == '-': seek(-d_seek) 63 | elif key == '=': seek(+d_seek) 64 | def handleSelect(): 65 | nonlocal ltrd 66 | area = guiSelectionUMat(img) 67 | if area != None: 68 | ltrd = area 69 | return (index, rectLtrd2Xywh(*ltrd)) 70 | frame_ops = [lambda: seek(-1), lambda: seek(+1)] 71 | 72 | unfinished, img = cap.read() 73 | while unfinished: 74 | imshow(title, img) 75 | if ltrd != None: imshow(title_preview, cv2Crop(img, ltrd)) 76 | key = cv2WaitKey() 77 | if key == 'c': 78 | select = handleSelect() 79 | if select != None: yield select 80 | elif key == 'q': break 81 | 82 | elif key in '-=': handleSeek(key) 83 | elif key == ' ': 84 | while True: 85 | key1 = cv2WaitKey(0) 86 | if key1 == ' ': break 87 | elif key1 == 'c': 88 | select = handleSelect() 89 | if select != None: yield select 90 | elif key1 in "-=89": 91 | handleSeek(key1) 92 | miniseek = ord(key1) - ord('8') #[89] to mimiseek 93 | if miniseek in range(len(frame_ops)): frame_ops[miniseek]() 94 | unfinished, img = cap.read() 95 | imshow(title, img) 96 | unfinished, img = cap.read() 97 | index += 1 98 | 99 | app = ArgumentParser("gui_crop_select", description="Interactive video crop rect selection") 100 | app.add_argument("video", nargs="+", type=FileType("r"), help="video paths") 101 | 102 | if __name__ == "__main__": 103 | cfg = app.parse_args() 104 | for path in map(lambda it: it.name, cfg.video): 105 | cap = VideoCapture(path) 106 | crops = selectCropRects(cap) 107 | for i, c in crops: 108 | (x,y, w,h) = c 109 | print(f"{i}({x},{y})[{w},{h}]") 110 | cap.release() 111 | -------------------------------------------------------------------------------- /libs/__package__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/duangsuse-valid-projects/extract-subtitles/af7f13e2a2f6abf6f70014b0927c3eb1ea333214/libs/__package__.py -------------------------------------------------------------------------------- /libs/cv_utils.py: -------------------------------------------------------------------------------- 1 | from typing import cast, Tuple 2 | from pathlib import Path 3 | 4 | import cv2 5 | from cv2 import UMat, VideoCapture 6 | from cv2 import CAP_PROP_FRAME_COUNT, CAP_PROP_FPS, CAP_PROP_FRAME_WIDTH, CAP_PROP_FRAME_HEIGHT 7 | 8 | import numpy as np 9 | from numpy import array, convolve 10 | from strsimpy.normalized_levenshtein import NormalizedLevenshtein 11 | 12 | from libs.fun_utils import require 13 | 14 | _levenshtein = NormalizedLevenshtein() 15 | def stringSimilarity(a: str, b: str) -> float: 16 | return _levenshtein.distance(a, b) 17 | 18 | def relativeChange(a: float, b: float) -> float: 19 | return (b - a) / max(a, b) 20 | 21 | # == OpenCV utils == 22 | def cv2NormalWin(title): 23 | cv2.namedWindow(title, cv2.WINDOW_NORMAL) 24 | 25 | def cv2WaitKey(delay_ms = 1) -> str: 26 | return chr(cv2.waitKey(delay_ms) & 0xFF) 27 | 28 | def cv2VideoProps(cap: VideoCapture) -> Tuple[int, int, int, int]: 29 | ''' (count, fps, width, height) ''' 30 | props = (CAP_PROP_FRAME_COUNT, CAP_PROP_FPS, CAP_PROP_FRAME_WIDTH, CAP_PROP_FRAME_HEIGHT) 31 | return cast(Tuple, tuple(map(int, map(cap.get, props)))) 32 | 33 | class Rect: 34 | def __init__(self, x,y, w,h): 35 | ''' (pad_left, pad_top, width, height) ''' 36 | self.xywh = (x,y, w,h) 37 | self.form_range = (y,y+h, x,x+w) 38 | def sliceUMat(self, mat: UMat) -> UMat: 39 | (y, y_end, x, x_end) = self.form_range 40 | return mat[y:y_end, x:x_end] 41 | 42 | class Frame: 43 | ''' Class to hold information about each frame ''' 44 | def __init__(self, no, img, value): 45 | self.no, self.img, self.value = no, img, value 46 | def __repr__(self): return f"Frame({self.no}, {self.value})" 47 | def __eq__(self, other): return self.no == other.no 48 | def __hash__(self): return hash(self.no) 49 | def __lt__(self, other): return self.no < other.no 50 | 51 | 52 | smooth_supported_windows = ["flat", "hanning", "hamming", "bartlett", "blackman"] 53 | def smooth(a: array, window_size: int, window = "hanning") -> array: 54 | supported_windows = smooth_supported_windows 55 | if window_size < 3: return a 56 | require(a.ndim == 1, "smooth only accepts 1 dimension arrays") 57 | require(a.size >= window_size, "input vector size must >= window size") 58 | require(window in supported_windows, f"window must in {supported_windows}") 59 | 60 | s = np.r_[2 * a[0] - a[window_size:1:-1], 61 | a, 2 * a[-1] - a[-1:-window_size:-1]] 62 | w = getattr(np, window)(window_size) if window != "flat" else np.ones(window_size, "d") 63 | y = convolve(w / w.sum(), s, mode="same") 64 | return y[window_size -1 : -window_size +1] 65 | 66 | def cvScale(mat: UMat, k_x, k_y) -> UMat: 67 | return cv2.resize(mat, None, fx=k_x, fy=k_y, interpolation=cv2.INTER_AREA) 68 | 69 | def cvGrayscale(mat: UMat) -> UMat: 70 | return cv2.cvtColor(mat, cv2.COLOR_BGR2GRAY) 71 | 72 | def cvBlur(mat: UMat, n = 9) -> UMat: 73 | return cv2.GaussianBlur(mat, (n, n), 0.0) 74 | 75 | class BasicCvProcess: 76 | ''' Helper class for simple CV programs (window+size, chunk_size, path_frames) ''' 77 | def __init__(self, window: str, window_size: int, chunk_size: int, path_frames: Path): 78 | require(chunk_size > window_size, f"chunk size({chunk_size}) must fill(≥) window({window_size})") 79 | require(path_frames.is_dir(), f"{path_frames} must be dir") 80 | self.window, self.window_size, self.chunk_size, self.path_frames = window, window_size, chunk_size, path_frames 81 | @staticmethod 82 | def registerArguments(ap): 83 | ap.add_argument("--window", type=str, default="hamming", help=f"filter window, one of {smooth_supported_windows}") 84 | ap.add_argument("--window-size", type=int, default=30, help="matrix filtering window size") 85 | ap.add_argument("--chunk-size", type=int, default=300, help="processing frame chunk size") 86 | ap.add_argument("--frames-dir", type=Path, default=Path("frames/"), help="directory to store the processed frames") 87 | def frameFilepath(self, it: Frame) -> str: 88 | return str(self.path_frames/f"frame_{it.no}.jpg") 89 | -------------------------------------------------------------------------------- /libs/fun_utils.py: -------------------------------------------------------------------------------- 1 | from typing import Callable, TypeVar, Generic, Any 2 | from typing import Iterable, Iterator, List, Tuple, Dict 3 | 4 | from re import findall 5 | from itertools import chain, repeat, islice 6 | from sys import stderr 7 | 8 | T = TypeVar("T") 9 | R = TypeVar("R", contravariant=True) 10 | K = TypeVar("K") 11 | V = TypeVar("V") 12 | 13 | # == Normal function utils == 14 | identity = lambda x: x 15 | noOp = lambda x: () 16 | 17 | def let(transform: Callable[[T], R], x: T) -> R: 18 | return transform(x) if x != None else None 19 | 20 | def also(op: Callable[[T], Any], x: T) -> T: 21 | op(x) 22 | return x 23 | 24 | def require(p: bool, message: str): 25 | if not p: raise ValueError(message) 26 | 27 | def toMapper(transform: Callable[[T], R]) -> Callable[[Iterable[T]], List[R]]: 28 | return lambda xs: [transform(x) for x in xs] 29 | 30 | def zipWithNext(xs: List[T]) -> Iterator[Tuple[T, T]]: 31 | require(len(xs) >= 2, f"len {len(xs)} is too short (< 2)") 32 | for i in range(1, len(xs)): 33 | yield (xs[i-1], xs[i]) 34 | 35 | def chunked(n: int, xs: Iterator[T]) -> Iterator[T]: 36 | while True: 37 | try: #< must return when inner gen finished 38 | first = next(xs) 39 | except StopIteration: return 40 | chunk = islice(xs, n) 41 | yield chain((first,) , chunk) 42 | 43 | def collect2(selector2: Callable[[T], Tuple[R, R]], xs: Iterable[T]) -> Tuple[List[R], List[R]]: 44 | bs, cs = [], [] 45 | for x in xs: 46 | b, c = selector2(x) 47 | bs.append(b); cs.append(c) 48 | return (bs, cs) 49 | 50 | def expandRangeStartList(size: int, entries: T, key: Callable[[T], int] = lambda it: it[0], value: Callable[[T], V] = lambda it: it[1]) -> List[V]: 51 | sorted_entries = sorted(entries, key=key) 52 | items = list(repeat(None, size)) 53 | def assignRange(start, stop, value): 54 | items[start:stop] = repeat(value, stop - start) 55 | for (a, b) in zipWithNext(sorted_entries): 56 | assignRange(key(a), key(b), value(a)) 57 | last_item = sorted_entries[-1] 58 | assignRange(key(last_item), size, value(last_item)) 59 | return items 60 | 61 | class PatternType: 62 | def __init__(self, regex, transform = identity): 63 | self.regex, self.transform = regex, transform 64 | def __repr__(self): return f"PatternType({self.regex})" 65 | def __call__(self, text): 66 | groups = findall(self.regex, text) 67 | return list(map(self.transform, groups)) 68 | 69 | 70 | def snakeSplit(text): return text.strip().split("_") 71 | def titleCased(texts, sep = " "): return sep.join(map(str.capitalize, texts)) 72 | 73 | def printAttributes(fmt = lambda k, v: f"[{titleCased(snakeSplit(k))}] {v}", sep = "\n", file = stderr, **kwargs): 74 | entries = [fmt(k, v) for (k, v) in kwargs.items()] 75 | print(sep.join(entries), file=file) 76 | 77 | class Reducer(Generic[T, R]): 78 | def __init__(self): pass 79 | def accept(self, value: T): pass 80 | def finish(self) -> R: pass 81 | def reduce(self, xs: Iterable[T]) -> R: 82 | for x in xs: 83 | self.accept(x) 84 | return self.finish() 85 | 86 | class EffectReducer(Reducer[T, R]): 87 | ''' `Reducer` defined using makeBase/onAccept ''' 88 | def __init__(self): 89 | self._base = self._makeBase() 90 | @classmethod #< used in ctor 91 | def _makeBase(cls): pass 92 | def _onAccept(self, base, value): pass 93 | def accept(self, value): 94 | self._onAccept(self._base, value) 95 | def finish(self): 96 | return self._base 97 | 98 | class AsNoOp(Reducer[Any, None]): 99 | def __init__(self, *args): pass 100 | 101 | class AsDict(EffectReducer[Tuple[K, V], Dict[K, V]]): 102 | @classmethod 103 | def _makeBase(cls): return dict() 104 | def _onAccept(self, base, value): 105 | (k, v) = value 106 | base[k] = v 107 | -------------------------------------------------------------------------------- /libs/lrc.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from typing import Tuple 4 | from re import findall 5 | 6 | SEC_MS = 1000 7 | MIN_MS = 60*SEC_MS 8 | HOUR_MS = 60*MIN_MS 9 | 10 | def time_just(v: float, n = 2, pad = '0') -> str: 11 | text = str(int(v)) 12 | return text.rjust(n, pad) 13 | 14 | def millis2MinSecMs(ms) -> Tuple[int, int, int]: 15 | mins, r = divmod(ms, MIN_MS) 16 | secs, r = divmod(r, SEC_MS) 17 | return (mins, secs, r) 18 | 19 | def millis2HourMinSecMs(ms) -> Tuple[int, int, int, int]: 20 | hrs, r = divmod(ms, HOUR_MS) 21 | mins, r = divmod(r, MIN_MS) 22 | secs, r = divmod(r, SEC_MS) 23 | return (hrs, mins, secs, r) 24 | 25 | def makeConvertorFps2Ms(fps): 26 | ''' creates a frame-no to millis convertor ''' 27 | return lambda no: (float(no) / fps) * SEC_MS 28 | 29 | 30 | def millis2LrcTime(ms) -> str: 31 | mins, secs, r = millis2MinSecMs(ms) 32 | return f"{time_just(mins)}:{time_just(secs)}.{int(r)}" 33 | 34 | def dumpsLrc(ms, text) -> str: 35 | return f"[{millis2LrcTime(ms)}] {text}" 36 | 37 | def loadsLrc(text) -> Tuple[int, str]: 38 | mm, ss, rrr, lyric = findall(r"^\[(\d{2}):(\d{2}).(\d+)\] ?(.+)$", text)[0] 39 | mins, secs, r = map(int, (mm, ss, rrr) ) 40 | return (mins*MIN_MS + secs*SEC_MS + r, lyric) 41 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | opencv-python>=4.2 2 | numpy>=1.0 3 | scipy>=1 4 | pytesseract>=0.3 5 | strsimpy>=0.1 6 | matplotlib>=3 7 | 8 | progressbar>=0.3 9 | -------------------------------------------------------------------------------- /timeline_ops.py: -------------------------------------------------------------------------------- 1 | #!/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | from sys import argv, stdin, stderr 5 | from re import findall 6 | from json import dumps, loads 7 | 8 | from libs.fun_utils import zipWithNext 9 | from libs.cv_utils import stringSimilarity 10 | from libs.lrc import makeConvertorFps2Ms, dumpsLrc, millis2HourMinSecMs, time_just 11 | 12 | from os import environ 13 | def env(name, transform, default): return default if name not in environ else transform(environ[name]) 14 | 15 | pipeSubtitle = env("PIPE", lambda code: eval(f"lambda it: {code}"), lambda it: it) 16 | 17 | class Record: 18 | ''' Value record on the time line ''' 19 | def __init__(self, start: int, end: int, value): 20 | self.start, self.end, self.value = start, end, value 21 | def __str__(self): 22 | return f"{self.start}-{self.end} {dumps(self.value, ensure_ascii=False)}" 23 | @staticmethod 24 | def loads(line): 25 | start, end, text = findall(r"^(\d+)-(\d+) (.*)$", line)[0] 26 | return Record(int(start), int(end), loads(text)) 27 | def mapTime(self, transform): 28 | return Record(transform(self.start), transform(self.end), self.value) 29 | 30 | class Timeline: 31 | def __init__(self, time: int, value): 32 | self.time, self.value = time, value 33 | def __str__(self): 34 | return f"{self.time} {dumps(self.value, ensure_ascii=False)}" 35 | @staticmethod 36 | def loads(line): 37 | time, text = findall(r"^(\d+) (.*)$", line)[0] 38 | return [int(time), loads(text)] 39 | 40 | #^ Two data&representations: Record(+end) and timeline(time, text) 41 | 42 | def openTimeline(path): 43 | return [Timeline.loads(ln) for ln in open(path, "r").readlines()] 44 | 45 | def mergeDebug(path): 46 | for (a, b) in zipWithNext(openTimeline(path)): 47 | ta, sa = a; tb, sb = b 48 | sa1, sb1 = map(pipeSubtitle, (sa, sb)) 49 | v = stringSimilarity(sa1, sb1) 50 | print(f"{ta}-{tb} {str(v)[0:4].ljust(4, '0')} {sa1} | {sb1}") 51 | 52 | def merge(path, strsim_bound_max, consume = print): 53 | bound_max = float(strsim_bound_max) 54 | (last_text, start, end) = (pipeSubtitle(""), 0, 0) 55 | onConsume = lambda: consume(Record(start, end, last_text)) 56 | for (time, text) in openTimeline(path): 57 | text1 = pipeSubtitle(text) 58 | if stringSimilarity(last_text, text1) < bound_max:# or last_text.isspace(): 59 | last_text = text1 60 | end = time #< renew end 61 | else: 62 | onConsume() 63 | (last_text, start, end) = (text, time, time) 64 | onConsume() 65 | 66 | lines = lambda s: iter(s.readline, "") 67 | 68 | def stdinSimplify(): 69 | for line in lines(stdin): 70 | rec = Record.loads(line) 71 | print(Timeline(rec.start, rec.value)) 72 | 73 | #v Lyric and formats 74 | def millis2SrtTime(ms, ms_sep = ",") -> str: 75 | hrs, mins, secs, r = millis2HourMinSecMs(ms) 76 | return f"{time_just(hrs)}:{time_just(mins)}:{time_just(secs)}{ms_sep}{int(r)}" 77 | 78 | def makeLyricFormater(fmt): 79 | if fmt == "lrc": return lambda rec: dumpsLrc(rec.start, rec.value) 80 | elif fmt == "srt": 81 | fTime = millis2SrtTime 82 | index = 1 83 | def _nextRecord(rec): 84 | nonlocal index 85 | line = f"{index}\n{fTime(rec.start)} --> {fTime(rec.end)}\n{rec.value}\n" 86 | index += 1 87 | return line 88 | return _nextRecord 89 | else: raise ValueError(f"unknown format {fmt}") 90 | 91 | def stdinToLRC(fps, fmt = "lrc"): 92 | ms = makeConvertorFps2Ms(float(fps)) 93 | accept = makeLyricFormater(fmt) 94 | for line in lines(stdin): 95 | rec = Record.loads(line).mapTime(ms) 96 | lrc = accept(rec) 97 | print(lrc) 98 | 99 | handler = { "merge-debug": mergeDebug, "merge": merge, "simplify": stdinSimplify, "to-lrc": stdinToLRC } 100 | 101 | def main(args): 102 | if len(args) == 0: 103 | tl = "timeline_file" 104 | print(f"Usage: merge-debug <{tl}> | merge <{tl}> | simplify | to-lrc (srt)", file=stderr) 105 | return 106 | key_op = args[0] 107 | handler[key_op](*args[1:]) 108 | 109 | if __name__ == "__main__": main(argv[1:]) 110 | -------------------------------------------------------------------------------- /vcat_subtitle_imgs.py: -------------------------------------------------------------------------------- 1 | #!/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | from typing import NewType, Tuple, Dict 5 | from PIL.Image import Image 6 | from PIL import Image as Pillow 7 | 8 | LTRD = NewType("LTRD", Tuple[int,int,int,int]) 9 | 10 | def imagePixels(img): 11 | for y in range(0, img.height): 12 | for x in range(0, img.width): 13 | yield img.getpixel((x, y)) 14 | 15 | def channelHistogram(img) -> Tuple: 16 | n_channels = Pillow.getmodebands(img.mode) 17 | hist = img.histogram() 18 | return tuple(hist[i:i+256] for i in range(0, n_channels*256, 256)) 19 | 20 | def count(xs): return sum(map(lambda _: 1, xs)) 21 | 22 | 23 | def vConcateImages(path_imgs: Dict[str,Image]) -> Tuple[Image, Dict[str,LTRD]]: 24 | width = max(map(lambda it: it.width, path_imgs.values())) 25 | height = sum(map(lambda it: it.height, path_imgs.values())) 26 | newImage = Pillow.new("RGBA", (width, height)) 27 | iter_img = iter(path_imgs.items()) 28 | areas = {} 29 | y = 0 30 | while y < height: 31 | (path, img) = next(iter_img) 32 | box = (0,y, img.width,y+img.height) 33 | areas[path] = box 34 | newImage.paste(img, box) 35 | y += img.height 36 | return (newImage, areas) 37 | 38 | def saveCropImages(img: Image, areas: Dict[str,LTRD]): 39 | for (k, v) in areas.items(): 40 | area = img.crop(v) 41 | area.save(k) 42 | onAreaWrote(k, area) 43 | 44 | 45 | from re import findall 46 | from json import dump, load 47 | def main(args): 48 | if len(args) == 0: 49 | print("Usage: dst src... | unpack dst\n(ON_AREA_WROTE=lambda path, img: )") 50 | return 51 | if args[0] == "unpack": 52 | path = args[1] 53 | areas = load(open(f"{path}.json", "r")) 54 | saveCropImages(Pillow.open(path), areas) 55 | else: 56 | dst = args[0] 57 | srcs = sorted(args[1:], key=lambda s: int(findall(r"(\d+)", s)[0])) 58 | print(f"{dst} << {srcs[:5]}..{srcs[-5:]} ({len(srcs)})") 59 | 60 | (img, areas) = vConcateImages({path: Pillow.open(path) for path in srcs}) 61 | img.save(dst); dump(areas, open(f"{dst}.json", "w+")) 62 | 63 | import os 64 | def defaultOnAreaWrote(path, img, mark_range=range(20, 1000), show=lambda n, r: n > r.stop): 65 | (r,_,_) = channelHistogram(img)[0:3] 66 | if r[0xFF] > 0: 67 | n_marks = count(filter(lambda it: it[0:3] == (0xFF,0,0), imagePixels(img))) 68 | if n_marks not in mark_range: return 69 | 70 | if show(n_marks, mark_range): img.show(title = f"Removed {path}") 71 | print(f"Removing {path} (redmarks {n_marks})") 72 | os.remove(path) 73 | 74 | onAreaWrote = eval("lambda path, img: " + (os.environ.get("ON_AREA_WROTE") or "defaultOnAreaWrote(path, img)")) 75 | 76 | from sys import argv 77 | if __name__ == "__main__": main(argv[1:]) 78 | --------------------------------------------------------------------------------