├── .gitignore
├── LICENSE
├── README.md
├── deprecated
    ├── extract_subtitles_old.py
    └── ocr.py
├── extract_subtitles.py
├── gui_crop_select.py
├── libs
    ├── __package__.py
    ├── cv_utils.py
    ├── fun_utils.py
    └── lrc.py
├── requirements.txt
├── timeline_ops.py
└── vcat_subtitle_imgs.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | db.sqlite3
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # pyenv
 76 | .python-version
 77 | 
 78 | # celery beat schedule file
 79 | celerybeat-schedule
 80 | 
 81 | # SageMath parsed files
 82 | *.sage.py
 83 | 
 84 | # Environments
 85 | .env
 86 | .venv
 87 | env/
 88 | venv/
 89 | ENV/
 90 | env.bak/
 91 | venv.bak/
 92 | 
 93 | # Spyder project settings
 94 | .spyderproject
 95 | .spyproject
 96 | 
 97 | # Rope project settings
 98 | .ropeproject
 99 | 
100 | # mkdocs documentation
101 | /site
102 | 
103 | # mypy
104 | .mypy_cache/
105 | 
106 | # pyCharm
107 | .idea/
108 | 
109 | # examples
110 | image_examples/
111 | video_examples/
112 | 
113 | frames/
114 | *.mp4
115 | *.mp3
116 | *.mkv
117 | *.lrc
118 | *.srt


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 Xiao Tian
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Subtitles Extraction
 2 | 
 3 | Extract key frames from [Amanpreet Walia](https://github.com/amanwalia92).
 4 | 
 5 | This project is used to extract subtitles from the video. First, the key frames is extracted from the video, and then the subtitle area of the frame picture is cropped, and the text is recognized by the OCR.
 6 | 
 7 | ## Getting Started
 8 | 
 9 | ### Install following dependences
10 | 
11 | - __OpenCV-Python__ (used for basic video processing e.g. read-frame-stream, crop, frame-diff, processing-gui)
12 | - __PyTesseract__ (only use its `image_to_string(img, lang)`)
13 | - NumPy (`smooth` filter) (find it [here](http://www.lfd.uci.edu/~gohlke/pythonlibs/#numpy))
14 | - SciPy (`signal.argrelextrema`)
15 | - StrsimPy (`NormalizedLevenshtein` string similiarity)
16 | - Matplotlib (draw frame differences stem plot)
17 | - ProgressBar
18 | 
19 | Install missing dependences first using `pip install -r requirements.txt`
20 | 
21 | ### Install Tesseract OCR
22 | 
23 | [Download](https://github.com/UB-Mannheim/tesseract/wiki) and (try) run it, select language support in `tesseract --list-lang` if you want.
24 | 
25 | ### Run
26 | 
27 | ```
28 | λ python extract_subtitles.py <videopath>
29 | 
30 | ./extract_subtitles.py -crop '0(907,940)[101,77]' -lang eng a.flv ; ./timeline_ops.py merge  frames/a.flv/timeline.txt 0|./timeline_ops.py to-lrc 1 srt >a.srt
31 | # srt Audacity ops referTo my GH:mkey/yy/atag2srt
32 | #and GH:tv/arecog.sh , GH:MontagePy/_1c
33 | # export OPENCV_OPENCL_DEVICE='Clover' # if VideoReader fails
34 | 
35 | ./extract_subtitles.py -crop '0(883,484)[134,120]' -lang index  --chunk-size 600 --crop-debug a.flv
36 | ```
37 | 
38 | `./montage1_c.py -font /usr/share/fonts/wenquanyi/*/wqy-zenhei.ttc a.mp4 --subtitle b.srt -font-size 25 --mon-background '#6e6c39' --subtitle-placeholder 好 -spacing :5,3 -key-color '#000000' --key-thres 3`
39 | 
40 | ## License
41 | 
42 | This project is licensed under the MIT License - see [LICENSE](LICENSE) for details
43 | 


--------------------------------------------------------------------------------
/deprecated/extract_subtitles_old.py:
--------------------------------------------------------------------------------
  1 | #!/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | from typing import Tuple
  5 | 
  6 | from argparse import ArgumentParser, FileType
  7 | from re import findall
  8 | from pathlib import Path
  9 | from os import remove
 10 | from progressbar import ProgressBar
 11 | 
 12 | import cv2
 13 | from cv2 import CAP_PROP_FRAME_COUNT, CAP_PROP_FPS, CAP_PROP_FRAME_WIDTH, CAP_PROP_FRAME_HEIGHT
 14 | from pytesseract import image_to_string
 15 | 
 16 | import numpy as np
 17 | from scipy.signal import argrelextrema
 18 | 
 19 | import matplotlib.pyplot as plot
 20 | 
 21 | 
 22 | class PatternType:
 23 |   def __init__(self, regex, transform = lambda x: x):
 24 |     self.regex, self.transform = regex, transform
 25 |   def __call__(self, string):
 26 |     return [self.transform(group) for group in findall(self.regex, string)]
 27 |   def __repr__(self): return f"PatternType({self.regex})"
 28 | 
 29 | def toMapper(transform):
 30 |   return lambda xs: [transform(x) for x in xs]
 31 | 
 32 | def zipWithNext(xs):
 33 |   assert len(xs) >= 2, f"{len(xs)} too short (< 2)"
 34 |   for i in range(1, len(xs)):
 35 |     yield (xs[i-1], xs[i])
 36 | 
 37 | def require(p, message):
 38 |   if not p: raise(ValueError(message))
 39 | 
 40 | def snakeSplit(text): return text.strip().split("_")
 41 | def titleCased(texts, sep=" "): return sep.join(map(str.capitalize, texts))
 42 | 
 43 | def printAttributes(fmt=lambda k, v: f"[{titleCased(snakeSplit(k))}] {v}", sep="\n", **kwargs):
 44 |   text = sep.join([fmt(k, v) for (k, v) in kwargs.items()])
 45 |   print(text)
 46 | 
 47 | def printResult(op):
 48 |   def _invoke(*args, **kwargs):
 49 |     res = op(*args, **kwargs)
 50 |     print(res); return res
 51 |   return _invoke
 52 | 
 53 | 
 54 | 
 55 | def smooth(x, len_window, window = "hanning") -> np.array:
 56 |   supported_windows = ["flat", "hanning", "hamming", "bartlett", "blackman"]
 57 |   print("smooth", len(x), len_window)
 58 |   if len_window < 3: return x
 59 |   require(x.ndim == 1, "smooth only accepts 1 dimension arrays")
 60 |   require(x.size >= len_window, "input vector must >= window size")
 61 | 
 62 |   require(window in supported_windows, f"window must of {supported_windows}")
 63 | 
 64 |   s = np.r_[2 * x[0] - x[len_window:1:-1],
 65 |             x, 2 * x[-1] - x[-1:-len_window:-1]]
 66 |   w = getattr(np, window)(len_window) if window != "flat" else np.ones(len_window, "d")
 67 |   y = np.convolve(w / w.sum(), s, mode="same")
 68 |   return y[len_window -1 : -len_window +1]
 69 | 
 70 | def cv2NormalWin(title):
 71 |   cv2.namedWindow(title, cv2.WINDOW_NORMAL)
 72 | 
 73 | def cv2WaitKey(key_code, delay_ms = 1) -> bool:
 74 |   require(len(key_code) == 1, f"{repr(key_code)} must be single char")
 75 |   return cv2.waitKey(delay_ms) & 0xFF == ord(key_code)
 76 | 
 77 | def cv2VideoProps(cap: cv2.VideoCapture, props = (CAP_PROP_FRAME_COUNT, CAP_PROP_FPS, CAP_PROP_FRAME_WIDTH, CAP_PROP_FRAME_HEIGHT)) -> Tuple[int, ...]:
 78 |   ''' (count, fps, width, height) '''
 79 |   return tuple(map(int, map(cap.get, props)))
 80 | 
 81 | class Frame:
 82 |   ''' Class to hold information about each frame '''
 83 |   def __init__(self, no, img, value):
 84 |     self.no, self.img, self.value = no, img, value
 85 | 
 86 |   def __lt__(self, other): return self.no < other.no
 87 |   def __eq__(self, other): return self.no == other.no
 88 | 
 89 | global app_cfg
 90 | 
 91 | def inFramesDir(name) -> str: return str(app_cfg.frames_dir/name)
 92 | filename_frame = lambda it: f"frame_{it.no}.jpg"
 93 | 
 94 | WIN_SUBTITLE_RECT = "Subtitle Rect"
 95 | WIN_LAST_FRAME = "Last Frame"
 96 | 
 97 | def postprocessArgs():
 98 |   if app_cfg.crop != None:
 99 |     ((x,y), (w,h)) = app_cfg.crop
100 |     app_cfg.crop_cfg = ((y, y+h), (x, x+w))
101 | 
102 | def recognizeText(name) -> str:
103 |   img = cv2.imread(inFramesDir(name))
104 |   if app_cfg.crop != None:
105 |     ((y, y_end), (x, x_end)) = app_cfg.crop_cfg
106 |     croped_img = img[y:y_end, x:x_end]
107 |     if app_cfg.crop_debug:
108 |       cv2.imshow(WIN_SUBTITLE_RECT, croped_img)
109 |       cv2.waitKey(1)
110 |     return image_to_string(croped_img, app_cfg.lang)
111 |   else:
112 |     return image_to_string(img, app_cfg.lang)
113 | 
114 | @printResult
115 | def relativeChange(a: float, b: float) -> float: return (b - a) / max(a, b)
116 | 
117 | 
118 | def solveFrameDifferences(cap: cv2.VideoCapture, on_frame = lambda x: ()) -> Tuple[list, list]:
119 |   frames, frame_diffs = [], []
120 | 
121 |   index = 0
122 |   prev_frame, curr_frame = None, None
123 | 
124 |   unfinished, img = cap.read()
125 |   prev_frame = img #< initial (prev == curr)
126 |   cv2NormalWin(WIN_LAST_FRAME)
127 |   (n_frame, _, _, _) = cv2VideoProps(cap)
128 |   progress = ProgressBar(maxval=n_frame).start()
129 |   while unfinished:
130 |     curr_frame = cv2.cvtColor(img, cv2.COLOR_BGR2LUV) #luv
131 |     if curr_frame is not None: # and prev_frame is not None
132 |       diff = cv2.absdiff(curr_frame, prev_frame) #< main logic goes here
133 |       count = np.sum(diff)
134 |       frame_diffs.append(count)
135 |       frame = Frame(index, img, count)
136 |       #frames.append(frame)
137 |     on_frame(curr_frame)
138 |     prev_frame = curr_frame
139 |     index = index + 1
140 |     progress.update(index)
141 |     unfinished, img = cap.read()
142 |     if app_cfg.crop_debug:
143 |       cv2.imshow(WIN_LAST_FRAME, prev_frame) #< must have single name, to animate
144 |       if cv2WaitKey('q'): break
145 |   progress.finish()
146 |   return (frames, frame_diffs)
147 | 
148 | def sortTopFramesDsc(frames, n_top_frames):
149 |   ''' sort the list in descending order '''
150 |   frames.sort(lambda it: it.value, reverse=True)
151 |   for keyframe in frames[:n_top_frames]:
152 |     name = filename_frame(keyframe)
153 |     cv2.imwrite(inFramesDir(name), keyframe.frame)
154 | 
155 | def writeFramesThreshold(frames):
156 |   for (a, b) in zipWithNext(frames):
157 |     if relativeChange(np.float(a.value), np.float(b.value)) < app_cfg.thres: continue
158 |     #print("prev_frame:"+str(frames[i-1].value)+"  curr_frame:"+str(frames[i].value))
159 |     name = filename_frame(a)
160 |     cv2.imwrite(inFramesDir(name), a.frame)
161 | 
162 | def ocrWithLocalMaxima(frames, frame_diffs, on_new_subtitle = print) -> np.array:
163 |   diff_array = np.array(frame_diffs)
164 |   sm_diff_array = smooth(diff_array, app_cfg.window_size)
165 |   frame_indices = np.subtract(np.asarray(argrelextrema(sm_diff_array, np.greater))[0], 1)
166 |   last_subtitle = ""
167 |   for frame in map(frames.__getitem__, frame_indices):
168 |     name = filename_frame(frame)
169 |     cv2.imwrite(inFramesDir(name), frame.img)
170 |     subtitle = recognizeText(name)
171 |     if subtitle != last_subtitle:
172 |       last_subtitle = subtitle #< Check for repeated subtitles 
173 |       on_new_subtitle(frame.no, subtitle)
174 |     remove(inFramesDir(name)) #< Delete recognized frame images
175 |   return sm_diff_array
176 | 
177 | def drawPlot(diff_array):
178 |   plot.figure(figsize=(40, 20))
179 |   plot.locator_params(numticks=100)
180 |   plot.stem(diff_array, use_line_collection=True)
181 |   plot.savefig(inFramesDir("plot.png"))
182 | 
183 | app = ArgumentParser(prog="extract_subtitles", description="extract subtitles using OCR with frame difference algorithm")
184 | apg = app.add_argument_group("basic workflow")
185 | apg.add_argument("video", type=FileType("r"), help="source file to extract from")
186 | apg.add_argument("-sort-top-dsc", metavar="n", nargs="?", type=int, help="make top frames and data descending")
187 | apg.add_argument("-thres", metavar="x.x", nargs="?", type=float, help="fixed threshold value (float)")
188 | apg.add_argument("-no-local-maxima", action="store_true", help="don't apply local maxima criteria")
189 | 
190 | '''
191 | Using crop mode(crop out subtitles area) can greatly improve recognition accuracy,
192 | but you need to manually adjust the crop area by modifying the value of cropper parameters(x, y, w, h).
193 | To debug the appropriate value, set -crop-debug to show cropped result.
194 | '''
195 | 
196 | apg1 = app.add_argument_group("misc settings")
197 | regex_tuple = PatternType(r"\((\d+),(\d+)\)", toMapper(int))
198 | apg1.add_argument("-crop", metavar="(x,y)(w,h)", type=regex_tuple, default=None, help="crop out subtitles area, improve recognition accuracy")
199 | apg1.add_argument("--crop-debug", action="store_true", help="show cropped result if avaliable")
200 | 
201 | apg1.add_argument("-lang", type=str, default="chi_sim", help="OCR language for tesseract engine (tesseract --list-langs)")
202 | apg1.add_argument("-draw-plot", action="store_true", help="draw plot for statics")
203 | apg1.add_argument("-frames-dir", type=Path, default=Path("frames/"), help="directory to store the processed frames")
204 | apg1.add_argument("-window-size", type=int, default=13, help="smoothing window size")
205 | 
206 | if __name__ == "__main__":
207 |   app_cfg = app.parse_args()
208 |   cfg = app_cfg
209 |   postprocessArgs()
210 |   printAttributes(
211 |     video_path=cfg.video.name,
212 |     frame_directory=cfg.frames_dir,
213 |     subtitle_language=cfg.lang,
214 |     crop=cfg.crop
215 |   )
216 |   print("Extracting key frames...")
217 | 
218 |   capture = cv2.VideoCapture(cfg.video.name)
219 |   cap_props = cv2VideoProps(capture)
220 |   printAttributes(video_props=cap_props)
221 |   (frames, frame_diffs) = solveFrameDifferences(capture)
222 |   capture.release()
223 |   cv2.destroyAllWindows()
224 |   if cfg.sort_top_dsc != None: sortTopFramesDsc(frames, cfg.sort_top_dsc)
225 |   if cfg.thres != None: writeFramesThreshold(frames)
226 |   diff_array = ocrWithLocalMaxima(frames, frame_diffs) if not cfg.no_local_maxima else None
227 |   if cfg.draw_plot: drawPlot(diff_array)
228 | 


--------------------------------------------------------------------------------
/deprecated/ocr.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import cv2
 4 | import operator
 5 | import numpy as np
 6 | import matplotlib.pyplot as plt
 7 | import sys
 8 | from PIL import Image
 9 | import pytesseract
10 | import PIL.ImageOps
11 | 
12 | #set cropper parameters
13 | # 我不是药神 100 410 600 40
14 | # minecraft 80,285,550,38
15 | # 狗十三 150 430 600 40
16 | #left_padding
17 | x=150
18 | #top_padding
19 | y=430
20 | #window_width
21 | w=600
22 | #window_height
23 | h=40
24 | 
25 | 
26 | imagePath = sys.argv[1]
27 | 
28 | # src=cv2.imread(imagePath)
29 | # cv2.imshow("Before", src)
30 | # cv2.cvtColor(src,cv2.COLOR_BGR2GRAY)
31 | 
32 | im=Image.open(imagePath)
33 | # image=np.asarray(im)
34 | # height,width=image.size
35 | # print(height)
36 | # image2=image.copy()
37 | # for i in range(height):
38 | #     for j in range(width):
39 | #         image2[i,j]=(255-image[i,j])
40 | 
41 | # image2.show()
42 | 
43 | #contrast
44 | #contrasted_im=PIL.ImageOps.autocontrast(im, cutoff=0)
45 | #contrasted_im.show()
46 | 
47 | #gray
48 | #grayed_im=PIL.ImageOps.grayscale(im)
49 | #grayed_im.show()
50 | 
51 | #invert
52 | inverted_im=PIL.ImageOps.invert(im)
53 | #inverted_im.show()
54 | 
55 | 
56 | croped_im=inverted_im.crop((x,y,x+w,y+h))
57 | croped_im.show()
58 | text=pytesseract.image_to_string(croped_im, lang='chi_sim')
59 | print(text)


--------------------------------------------------------------------------------
/extract_subtitles.py:
--------------------------------------------------------------------------------
  1 | #!/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | from typing import Tuple, List, Iterator
  5 | 
  6 | from argparse import ArgumentParser, Namespace, FileType
  7 | from pathlib import Path
  8 | from sys import argv, stderr
  9 | from re import findall
 10 | from functools import reduce
 11 | from progressbar import ProgressBar
 12 | 
 13 | from json import dumps
 14 | 
 15 | from libs.fun_utils import let, also, require
 16 | from libs.fun_utils import zipWithNext, chunked, collect2, expandRangeStartList
 17 | from libs.fun_utils import PatternType, toMapper, printAttributes
 18 | from libs.fun_utils import Reducer, AsNoOp
 19 | 
 20 | from libs.cv_utils import Frame, Rect, BasicCvProcess
 21 | from libs.cv_utils import smooth as orig_smooth, relativeChange, stringSimilarity
 22 | from libs.cv_utils import cv2VideoProps, cv2NormalWin, cv2WaitKey
 23 | from libs.cv_utils import cvScale, cvBlur, cvGrayscale
 24 | 
 25 | import cv2
 26 | from cv2 import UMat, VideoCapture
 27 | from pytesseract import image_to_string
 28 | 
 29 | import numpy as np
 30 | from numpy import array, concatenate
 31 | from scipy import signal
 32 | 
 33 | import matplotlib.pyplot as plot
 34 | 
 35 | # == App Common Logics ==
 36 | USE_FEATURE = set([])
 37 | FEAT_DEBUG = "--debug"
 38 | FEAT_PROGRESS = "--use-progress"
 39 | FEAT_SHARP = "--use-sharp"
 40 | NOT_COMMON_PUNTUATION = "#$%&\\()*+-/:;<=>@[]^_`{|}" + "—»™€°"
 41 | 
 42 | feats = USE_FEATURE.__contains__
 43 | 
 44 | def printDebug(*args, **kwargs):
 45 |   if feats(FEAT_DEBUG): print(*args, **kwargs, file=stderr)
 46 | 
 47 | def stripAll(symbols, text) -> str:
 48 |   return text.translate({ord(c):"" for c in symbols})
 49 | 
 50 | def smooth(a, window_size, window) -> array:
 51 |   printDebug(f"smooth [...x{len(a)}], {window_size} {window}")
 52 |   return orig_smooth(a, window_size, window)
 53 | 
 54 | def cvInGrayRange(img: UMat, start: int, end: int) -> UMat:
 55 |   return cv2.inRange(img, (start,start,start), (end,end,end))
 56 | 
 57 | class AsProgress(Reducer):
 58 |   def __init__(self, cap: VideoCapture, crop):
 59 |     n_frame = cv2VideoProps(cap)[0]
 60 |     self.progress = ProgressBar(maxval=n_frame).start()
 61 |   def accept(self, index):
 62 |     self.progress.update(index)
 63 |   def finish(self):
 64 |     self.progress.finish()
 65 | 
 66 | # == Main Algorithm ==
 67 | class ExtractSubtitles(BasicCvProcess):
 68 |   '''
 69 |   Operation of extracting video subtitle area as text,
 70 |   - configurable: `cropUMat`, `postprocessUMat`, `onFrameList`, `subtitleShouledReplace`, `postpreocessSubtitle`
 71 |   - workflow: `runOn`, `solveFrameDifferences`, `findPeaks`, `onFrameList`, `ocrWithLocalMaxima`
 72 |   '''
 73 |   WIN_LAST_IMAGE = "Last Image"
 74 |   WIN_LAST_FRAME = "Last Frame (processed image)"
 75 |   WIN_SUBTITLE_RECT = "Subtitle Rect"
 76 | 
 77 |   def __init__(self, lang: str, is_crop_debug: bool, diff_save_thres: float, window, window_size, chunk_size, path_frames):
 78 |     '''
 79 |     - lang: language for Tesseract OCR
 80 |     - is_crop_debug: show OpenCV capture GUI when processing
 81 |     - diff_save_thres: save threshold for differential frame dropper
 82 |     - window: windowing kind
 83 |     - window_size: window size for numpy algorithms
 84 |     - chunk_size: processing chunk size for `ocrWithLocalMaxima()`
 85 |     - path_frames: temporary path for frame files
 86 |     '''
 87 |     self.lang, self.is_crop_debug, self.diff_save_thres = lang, is_crop_debug, diff_save_thres
 88 |     super().__init__(window, window_size, chunk_size, path_frames)
 89 | 
 90 |   def cropUMat(self, mat: UMat, crop: List[Rect], index: int) -> UMat:
 91 |     if crop == None: return mat
 92 |     cropped_img = crop[0].sliceUMat(mat)
 93 |     if self.is_crop_debug:
 94 |       cv2.imshow(ExtractSubtitles.WIN_SUBTITLE_RECT, cropped_img)
 95 |       cv2WaitKey()
 96 |     return cropped_img
 97 | 
 98 |   def postprocessUMat(self, mat: UMat, index: int) -> UMat: return mat
 99 | 
100 |   def recognizeText(self, frame: Frame) -> str:
101 |     return image_to_string(frame.img, self.lang)
102 | 
103 |   #v frame & subtitles
104 |   def onFrameList(self, frames):
105 |     if self.diff_save_thres != None: self.writeFramesThresholded(frames)
106 | 
107 |   def subtitleShouldReplace(self, a, b) -> bool:
108 |     return b != a and b.count("\n") == 0 and stringSimilarity(a, b) > (1/4)
109 | 
110 |   def postprocessSubtitle(self, text) -> str:
111 |     return stripAll(NOT_COMMON_PUNTUATION, text)
112 | 
113 |   def solveFrameDifferences(self, cap: VideoCapture, crop: List[Rect], fold) -> Iterator[Frame]:
114 |     require(cap.isOpened(), "failed to open capture")
115 |     postprocess = lambda mat, index: self.postprocessUMat(self.cropUMat(mat, crop, index), index)
116 |     if self.is_crop_debug:
117 |       cv2NormalWin(ExtractSubtitles.WIN_LAST_IMAGE)
118 |       cv2NormalWin(ExtractSubtitles.WIN_LAST_FRAME)
119 |     reducer = fold(cap, crop)
120 | 
121 |     index = 0
122 |     prev_frame, curr_frame = None, None
123 |     unfinished, img = cap.read()
124 |     prev_frame = postprocess(img, 0) #< initial (prev == curr)
125 |     def go(i):
126 |       nonlocal index
127 |       index=i; cap.set(cv2.CAP_PROP_POS_FRAMES,i)
128 |     while unfinished:
129 |       curr_frame = postprocess(img, index)
130 |       if self.is_crop_debug:
131 |         cv2.imshow(ExtractSubtitles.WIN_LAST_IMAGE, img)
132 |         cv2.imshow(ExtractSubtitles.WIN_LAST_FRAME, curr_frame) #< must have single title, to animate
133 |         k = cv2WaitKey()
134 |         if k == 'q': break
135 |         elif k=='`': breakpoint()#use go(i) to change pos
136 |       if curr_frame is not None: # and prev_frame is not None
137 |         try:
138 |           diff = cv2.absdiff(curr_frame, prev_frame) #< main algorithm goes here
139 |           yield Frame(index, curr_frame, np.sum(diff))
140 |         except cv2.error: pass
141 |       prev_frame = curr_frame
142 |       unfinished, img = cap.read()
143 |       index = index + 1
144 |       reducer.accept(index)
145 |     reducer.finish()
146 | 
147 |   def postprocessDifferences(self, a: array) -> array: return smooth(a, self.window_size, self.window)
148 |   def findPeaks(self, a: array) -> array: return np.asarray(signal.argrelextrema(a, np.greater))[0] #< argrelextrema(_) always (x,)
149 | 
150 |   def ocrWithLocalMaxima(self, frames, reducer) -> Tuple[array, array]:
151 |     '''
152 |     - frames: chunked processing using window, reducing memory usage
153 |     - reducer: accept (frame, subtitle)
154 |     '''
155 |     frame_list, frame_diffs = collect2(lambda it: (it, it.value), frames)
156 |     self.onFrameList(frame_list)
157 | 
158 |     diff_array = self.postprocessDifferences(array(frame_diffs))
159 |     valid_indices = self.findPeaks(diff_array)
160 |     for i in valid_indices:
161 |       frame = frame_list[i]
162 |       if self.is_crop_debug:
163 |         cv2.imshow(ExtractSubtitles.WIN_SUBTITLE_RECT, frame.img)
164 |         cv2WaitKey()
165 |       subtitle = self.recognizeText(frame) if self.lang!="index" else f"{i}"
166 |       reducer.accept(frame, subtitle)
167 |     reducer.finish()
168 |     return (diff_array, valid_indices)
169 | 
170 |   class DefaultOcrFold(Reducer):
171 |     def __init__(self, ctx, name, on_new_subtitle = print):
172 |       self.ctx = ctx; self.on_new_subtitle = on_new_subtitle
173 |       self.path = self.ctx.path_frames/name
174 |       mkdirIfNotExists(self.path)
175 |       self.files = [(self.path/f"{group}.txt").open("a+") for group in ["timeline", "loser"]]
176 |       self.out_timeline, self.out_lose_subtitle = self.files
177 |       self.last_subtitle = ""
178 |       self.frame_index = 0
179 |     def accept(self, frame, subtitle):
180 |       self.out_timeline.write(f"{frame.no} {dumps(subtitle, ensure_ascii=False)}\n")
181 |       if self.ctx.subtitleShouldReplace(self.last_subtitle, subtitle): #< check for repeated subtitles 
182 |         self.last_subtitle = subtitle #v also clean-up new subtitle
183 |         self.on_new_subtitle(frame.no, self.ctx.postprocessSubtitle(subtitle))
184 |       else:
185 |         self.out_lose_subtitle.write(f"{frame.no} {subtitle}\n")
186 |       if self.ctx.is_crop_debug:
187 |         cv2.imwrite(str(self.path/f"subtitle_{self.frame_index}.png"), frame.img)
188 |       self.frame_index += 1
189 |     def finish(self): #< in (single chunk) OCR
190 |       for f in self.files: f.flush()
191 |     def finishAll(self):
192 |       for f in self.files: f.close()
193 | 
194 |   def runOn(self, cap: VideoCapture, crop: List[Rect], fold = DefaultOcrFold, name = "default") -> Tuple[array, array]:
195 |     '''
196 |     - cap: video input
197 |     - crop: Rect area for lyric graphics
198 |     - fold: init (self, name)
199 |     '''
200 |     frames = self.solveFrameDifferences(cap, crop, AsProgress if feats(FEAT_PROGRESS) else AsNoOp)
201 |     reducer = fold(self, name)
202 |     processChunk = lambda it: self.ocrWithLocalMaxima(it, reducer)
203 |     diff_array_parts = map(processChunk, chunked(self.chunk_size, frames))
204 |     def concatResults(a, b) -> Tuple[array, array]:
205 |       a0, a1 = a
206 |       b0, b1 = b
207 |       ab0 = concatenate(array([a0, b0],dtype=object))
208 |       ab1 = concatenate(array([a1, b1+len(a0)],dtype=object))
209 |       return (ab0, ab1)
210 |     (diff_array, indices) = reduce(concatResults, diff_array_parts)
211 | 
212 |     reducer.finishAll()
213 |     cv2.destroyAllWindows()
214 |     return (diff_array, indices)
215 | 
216 |   def writeFramesThresholded(self, frames):
217 |     for (a, b) in zipWithNext(frames):
218 |       if b.value == 0: continue #< what if no motion between (last-1)&last ?
219 |       k_change = relativeChange(np.float(a.value), np.float(b.value))
220 |       if k_change < self.diff_save_thres: continue
221 |       printDebug(f"[{b.no}]({k_change}) prev: {a.value}, curr: {b.value}")
222 |       cv2.imwrite(self.frameFilepath(a), a.img)
223 | 
224 |   def drawPlot(self, diff_array, indices):
225 |     fig_diff = plot.figure(figsize=(40, 20))
226 |     plot.xlabel("Frame.no")
227 |     plot.ylabel("differences")
228 |     plot.locator_params(100)
229 |     plot.stem(diff_array, linefmt=":", use_line_collection=True)
230 |     plot.stem(indices, [diff_array[i] for i in indices], use_line_collection=True)
231 |     return fig_diff
232 | 
233 | 
234 | # == Main ==
235 | def makeArgumentParser():
236 |   app = ArgumentParser(
237 |     prog="extract_subtitles",
238 |     description="Extract subtitles using OpenCV / Tesseract OCR with frame difference algorithm")
239 | 
240 |   apg = app.add_argument_group("basic workflow")
241 |   apg.add_argument("video", nargs="+", type=FileType("r"), help="source file to extract from")
242 |   apg.add_argument("-crop", metavar="frame(x,y)[w,h]",
243 |     type=PatternType(r"(\d+)\((\d+),(\d+)\)\[(\d+),(\d+)\]", toMapper(int)),
244 |     default=None, help="crop out subtitles area, improve recognition accuracy")
245 |   apg.add_argument("-filter-code", type=str, default="it", help="(it: cv2.UMat) pipe function")
246 |   apg.add_argument("-lang", type=str, default="eng", help="OCR language for Tesseract `tesseract --list-langs`")
247 |   apg.add_argument("-save-thres", metavar="x.x", type=float, default=None, help="add frame store for fixed save threshold value")
248 | 
249 |   apg1 = app.add_argument_group("misc settings")
250 |   apg1.add_argument("--crop-debug", action="store_true", help="show OpenCV GUI when processing")
251 |   apg1.add_argument("--draw-plot", action="store_true", help="draw difference plot for statics")
252 |   apg1.add_argument(FEAT_SHARP, action="store_true", help="use non-smooth differential (improve for timeline, slower)")
253 |   apg1.add_argument(FEAT_PROGRESS, action="store_true", help="show progress bar")
254 |   apg1.add_argument(FEAT_DEBUG, action="store_true", help="print debug info")
255 |   apg1.add_argument("--only-images", action="store_true", help="use frame images from --crop-debug as input")
256 |   BasicCvProcess.registerArguments(apg1)
257 |   return app
258 | 
259 | def mkdirIfNotExists(self: Path):
260 |   if not self.exists(): self.mkdir()
261 | 
262 | def makeExtractor(cfg: Namespace, cls_extract=ExtractSubtitles) -> ExtractSubtitles:
263 |   lang, crop, crop_debug, save_thres, window, window_size, chunk_size, frames_dir = cfg.lang, cfg.crop, cfg.crop_debug, cfg.save_thres, cfg.window, cfg.window_size, cfg.chunk_size, cfg.frames_dir
264 |   printAttributes(
265 |     subtitle_language=lang,
266 |     crop=crop,
267 |     save_threshold=save_thres,
268 |     filter_window=window,
269 |     filter_window_size=window_size,
270 |     process_chunk_size=chunk_size,
271 |     frame_directory=frames_dir
272 |   )
273 |   if cfg.use_sharp: #< assign extra config
274 |     USE_FEATURE.add(FEAT_SHARP)
275 |   if cfg.use_progress:
276 |     USE_FEATURE.add(FEAT_PROGRESS)
277 |   if cfg.debug:
278 |     USE_FEATURE.add(FEAT_DEBUG)
279 | 
280 |   extractor = cls_extract(lang, crop_debug, save_thres,
281 |     window, window_size, chunk_size, also(mkdirIfNotExists, Path(frames_dir)) )
282 |   return extractor
283 | 
284 | class EvalFilterExtractSubtitle(ExtractSubtitles):
285 |   def __init__(self, *args, filter_code = "it"):
286 |     ''' filter_code: Python expr about `(it: cv2.UMat)` results `cv2.UMat` '''
287 |     super().__init__(*args)
288 |     self.mat_filter = eval(compile(f"lambda it, i: {filter_code}", "<frame_filter>", "eval"))
289 |     self.is_sharp = feats(FEAT_SHARP)
290 |   def postprocessUMat(self, mat, index):
291 |     return self.mat_filter(mat, index)
292 |   def postprocessDifferences(self, a) -> array:
293 |     return (a if self.is_sharp else super().postprocessDifferences(a))
294 | 
295 | class CropEvalFilterExtractSubtitle(EvalFilterExtractSubtitle):
296 |   def cropUMat(self, mat, crop, index) -> UMat:
297 |     cropped_img = crop[index].sliceUMat(mat)
298 |     if self.is_crop_debug:
299 |       cv2.imshow(ExtractSubtitles.WIN_SUBTITLE_RECT, cropped_img)
300 |       cv2WaitKey()
301 |     return cropped_img
302 | 
303 | # == Entry ==
304 | def main(args):
305 |   app = makeArgumentParser()
306 |   cfg = app.parse_args(args)
307 |   cls_extract = lambda *args: (EvalFilterExtractSubtitle if cfg.crop == None or len(cfg.crop) <= 1 else CropEvalFilterExtractSubtitle) (*args, filter_code=cfg.filter_code)
308 |   extractor = makeExtractor(cfg, cls_extract=cls_extract)
309 | 
310 |   def drawPlot(diff_array, indices):
311 |     if not cfg.draw_plot: return
312 |     fig_diff = extractor.drawPlot(diff_array, indices)
313 |     print(indices)
314 |     plot.title(f"Filtered differential sum for {video_name}")
315 |     plot.show()
316 |     fig_diff.savefig(cfg.frames_dir/f"plot_{video_name}.png")
317 | 
318 |   def makeCrops(n_frames):
319 |     #v [(t, x,y, w,h), ...]
320 |     key = lambda it: it[0]; makeRect = lambda it: Rect(*it[1:])
321 |     crops = let(lambda t: [makeRect(t[0])] if len(t) == 1 else expandRangeStartList(n_frames, t, key=key, value=makeRect), cfg.crop)
322 |     if crops != None: require(crops[0] != None, "first crop area must started at frame 0")
323 |     #^ only when multi-crop enabled
324 |     return crops
325 | 
326 |   def readInt(s): return int(findall(r"(\d+)", s)[0])
327 | 
328 |   pathes = map(lambda it: it.name, cfg.video)
329 |   if cfg.only_images:
330 |     extractor.postprocessDifferences = lambda diffs: diffs
331 |     extractor.findPeaks = lambda a: range(0, len(a)) #< required for smooth & peak estim. bypass
332 |     frames = sorted([Frame(readInt(path), cv2.imread(path), 0) for path in pathes])
333 |     print(array([it.no for it in frames])) #< NOTE: I don't know if len(sorted(a)) gets shorter second time access
334 | 
335 |     reducer = ExtractSubtitles.DefaultOcrFold(extractor, "only_images")
336 |     _, indices = extractor.ocrWithLocalMaxima(frames, reducer)
337 |     reducer.finishAll()
338 |     cv2.destroyAllWindows()
339 | 
340 |   for path in pathes:
341 |     video_name = Path(path).name
342 |     printAttributes(video_path=path)
343 |     print("Extracting key frames...")
344 | 
345 |     capture = VideoCapture(path)
346 |     n_frames, fps, w, h = cv2VideoProps(capture)
347 |     printAttributes(video_playback=(n_frames, fps), video_dimens=(w, h))
348 | 
349 |     (diff_array, indices) = extractor.runOn(capture, makeCrops(n_frames), name=video_name)
350 |     capture.release()
351 |     drawPlot(diff_array, indices)
352 | 
353 | if __name__ == "__main__": main(argv[1:]) #< no program name
354 | 


--------------------------------------------------------------------------------
/gui_crop_select.py:
--------------------------------------------------------------------------------
  1 | #!/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | from typing import Optional, Tuple, Iterator, cast
  5 | 
  6 | from argparse import ArgumentParser, FileType
  7 | 
  8 | from cv2 import VideoCapture, UMat, imshow, rectangle
  9 | from cv2 import namedWindow, destroyWindow, WINDOW_NORMAL
 10 | from cv2 import setMouseCallback, waitKey
 11 | from cv2 import EVENT_LBUTTONDOWN, EVENT_LBUTTONUP
 12 | from cv2 import CAP_PROP_POS_FRAMES
 13 | 
 14 | def cv2WaitKey(block_ms = 1) -> str:
 15 |   return chr(waitKey(block_ms) & 0xFF)
 16 | 
 17 | def rectLtrd2Xywh(lt: Tuple[int, int], rd: Tuple[int, int]) -> Tuple[int, int, int, int]:
 18 |   x1, y1 = lt
 19 |   x2, y2 = rd
 20 |   return (x1, y1, x2-x1, y2-y1)
 21 | 
 22 | def cv2Crop(mat: UMat, ltrd) -> UMat:
 23 |   x,y, w,h = rectLtrd2Xywh(*ltrd)
 24 |   return mat[y:y+h, x:x+w]
 25 | 
 26 | # == App ==
 27 | def guiSelectionUMat(mat: UMat, title="Rect Selection (r:replot; c:OK)", box_color=(0xFF, 0x00, 0x00), thickness=3) -> Optional[Tuple[Tuple[int, int], Tuple[int, int]]]:
 28 |   ''' (left_top, right_down) '''
 29 |   p_lt, p_rd = None, None
 30 |   shot = mat.copy()
 31 |   def onMouseEvent(event, x, y, flags, param):
 32 |     nonlocal p_lt, p_rd, shot
 33 |     if event == EVENT_LBUTTONDOWN:
 34 |       p_lt = (x, y)
 35 |     elif event == EVENT_LBUTTONUP:
 36 |       p_rd = (x, y)
 37 |       rectangle(shot, p_lt, p_rd, box_color, thickness)
 38 |       imshow(title, shot)
 39 |   namedWindow(title, WINDOW_NORMAL)
 40 |   setMouseCallback(title, onMouseEvent)
 41 |   imshow(title, shot)
 42 |   while True:
 43 |     key = cv2WaitKey(0)
 44 |     if key == 'c' and (p_lt != None and p_rd != None):
 45 |       destroyWindow(title)
 46 |       return cast(Tuple, (p_lt, p_rd))
 47 |     elif key == 'r':
 48 |       shot = mat.copy()
 49 |       imshow(title, shot)
 50 |     elif key == 'q':
 51 |       return None
 52 | 
 53 | def selectCropRects(cap: VideoCapture, title = "Video (c:OK; q:finished, <:-; >:=)", title_preview = "Preview", d_seek=5) -> Iterator[Tuple[int, Tuple[int, int, int, int]]]:
 54 |   ''' position&size (x, y, w, h) '''
 55 |   index = 0
 56 |   ltrd = None
 57 |   def seek(n):
 58 |     nonlocal index
 59 |     index += n
 60 |     cap.set(CAP_PROP_POS_FRAMES, index)
 61 |   def handleSeek(key):
 62 |     if key == '-': seek(-d_seek)
 63 |     elif key == '=': seek(+d_seek)
 64 |   def handleSelect():
 65 |     nonlocal ltrd
 66 |     area = guiSelectionUMat(img)
 67 |     if area != None:
 68 |       ltrd = area
 69 |       return (index, rectLtrd2Xywh(*ltrd))
 70 |   frame_ops = [lambda: seek(-1), lambda: seek(+1)]
 71 | 
 72 |   unfinished, img = cap.read()
 73 |   while unfinished:
 74 |     imshow(title, img)
 75 |     if ltrd != None: imshow(title_preview, cv2Crop(img, ltrd))
 76 |     key = cv2WaitKey()
 77 |     if key == 'c':
 78 |       select = handleSelect()
 79 |       if select != None: yield select
 80 |     elif key == 'q': break
 81 | 
 82 |     elif key in '-=': handleSeek(key)
 83 |     elif key == ' ':
 84 |       while True:
 85 |         key1 = cv2WaitKey(0)
 86 |         if key1 == ' ': break
 87 |         elif key1 == 'c':
 88 |           select = handleSelect()
 89 |           if select != None: yield select          
 90 |         elif key1 in "-=89":
 91 |           handleSeek(key1)
 92 |           miniseek = ord(key1) - ord('8') #[89] to mimiseek
 93 |           if miniseek in range(len(frame_ops)): frame_ops[miniseek]()
 94 |           unfinished, img = cap.read()
 95 |           imshow(title, img)
 96 |     unfinished, img = cap.read()
 97 |     index += 1
 98 | 
 99 | app = ArgumentParser("gui_crop_select", description="Interactive video crop rect selection")
100 | app.add_argument("video", nargs="+", type=FileType("r"), help="video paths")
101 | 
102 | if __name__ == "__main__":
103 |   cfg = app.parse_args()
104 |   for path in map(lambda it: it.name, cfg.video):
105 |     cap = VideoCapture(path)
106 |     crops = selectCropRects(cap)
107 |     for i, c in crops:
108 |       (x,y, w,h) = c
109 |       print(f"{i}({x},{y})[{w},{h}]")
110 |     cap.release()
111 | 


--------------------------------------------------------------------------------
/libs/__package__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/duangsuse-valid-projects/extract-subtitles/af7f13e2a2f6abf6f70014b0927c3eb1ea333214/libs/__package__.py


--------------------------------------------------------------------------------
/libs/cv_utils.py:
--------------------------------------------------------------------------------
 1 | from typing import cast, Tuple
 2 | from pathlib import Path
 3 | 
 4 | import cv2
 5 | from cv2 import UMat, VideoCapture
 6 | from cv2 import CAP_PROP_FRAME_COUNT, CAP_PROP_FPS, CAP_PROP_FRAME_WIDTH, CAP_PROP_FRAME_HEIGHT
 7 | 
 8 | import numpy as np
 9 | from numpy import array, convolve
10 | from strsimpy.normalized_levenshtein import NormalizedLevenshtein
11 | 
12 | from libs.fun_utils import require
13 | 
14 | _levenshtein = NormalizedLevenshtein()
15 | def stringSimilarity(a: str, b: str) -> float:
16 |   return _levenshtein.distance(a, b)
17 | 
18 | def relativeChange(a: float, b: float) -> float:
19 |   return (b - a) / max(a, b)
20 | 
21 | # == OpenCV utils ==
22 | def cv2NormalWin(title):
23 |   cv2.namedWindow(title, cv2.WINDOW_NORMAL)
24 | 
25 | def cv2WaitKey(delay_ms = 1) -> str:
26 |   return chr(cv2.waitKey(delay_ms) & 0xFF)
27 | 
28 | def cv2VideoProps(cap: VideoCapture) -> Tuple[int, int, int, int]:
29 |   ''' (count, fps, width, height) '''
30 |   props = (CAP_PROP_FRAME_COUNT, CAP_PROP_FPS, CAP_PROP_FRAME_WIDTH, CAP_PROP_FRAME_HEIGHT)
31 |   return cast(Tuple, tuple(map(int, map(cap.get, props))))
32 | 
33 | class Rect:
34 |   def __init__(self, x,y, w,h):
35 |     ''' (pad_left, pad_top, width, height) '''
36 |     self.xywh = (x,y, w,h)
37 |     self.form_range = (y,y+h, x,x+w)
38 |   def sliceUMat(self, mat: UMat) -> UMat:
39 |     (y, y_end, x, x_end) = self.form_range
40 |     return mat[y:y_end, x:x_end]
41 | 
42 | class Frame:
43 |   ''' Class to hold information about each frame '''
44 |   def __init__(self, no, img, value):
45 |     self.no, self.img, self.value = no, img, value
46 |   def __repr__(self): return f"Frame({self.no}, {self.value})"
47 |   def __eq__(self, other): return self.no == other.no
48 |   def __hash__(self): return hash(self.no)
49 |   def __lt__(self, other): return self.no < other.no
50 | 
51 | 
52 | smooth_supported_windows = ["flat", "hanning", "hamming", "bartlett", "blackman"]
53 | def smooth(a: array, window_size: int, window = "hanning") -> array:
54 |   supported_windows = smooth_supported_windows
55 |   if window_size < 3: return a
56 |   require(a.ndim == 1, "smooth only accepts 1 dimension arrays")
57 |   require(a.size >= window_size, "input vector size must >= window size")
58 |   require(window in supported_windows, f"window must in {supported_windows}")
59 | 
60 |   s = np.r_[2 * a[0] - a[window_size:1:-1],
61 |             a, 2 * a[-1] - a[-1:-window_size:-1]]
62 |   w = getattr(np, window)(window_size) if window != "flat" else np.ones(window_size, "d")
63 |   y = convolve(w / w.sum(), s, mode="same")
64 |   return y[window_size -1 : -window_size +1]
65 | 
66 | def cvScale(mat: UMat, k_x, k_y) -> UMat:
67 |   return cv2.resize(mat, None, fx=k_x, fy=k_y, interpolation=cv2.INTER_AREA)
68 | 
69 | def cvGrayscale(mat: UMat) -> UMat:
70 |   return cv2.cvtColor(mat, cv2.COLOR_BGR2GRAY)
71 | 
72 | def cvBlur(mat: UMat, n = 9) -> UMat:
73 |   return cv2.GaussianBlur(mat, (n, n), 0.0)
74 | 
75 | class BasicCvProcess:
76 |   ''' Helper class for simple CV programs (window+size, chunk_size, path_frames) '''
77 |   def __init__(self, window: str, window_size: int, chunk_size: int, path_frames: Path):
78 |     require(chunk_size > window_size, f"chunk size({chunk_size}) must fill(≥) window({window_size})")
79 |     require(path_frames.is_dir(), f"{path_frames} must be dir")
80 |     self.window, self.window_size, self.chunk_size, self.path_frames = window, window_size, chunk_size, path_frames
81 |   @staticmethod
82 |   def registerArguments(ap):
83 |     ap.add_argument("--window", type=str, default="hamming", help=f"filter window, one of {smooth_supported_windows}")
84 |     ap.add_argument("--window-size", type=int, default=30, help="matrix filtering window size")
85 |     ap.add_argument("--chunk-size", type=int, default=300, help="processing frame chunk size")
86 |     ap.add_argument("--frames-dir", type=Path, default=Path("frames/"), help="directory to store the processed frames")
87 |   def frameFilepath(self, it: Frame) -> str:
88 |     return str(self.path_frames/f"frame_{it.no}.jpg")
89 | 


--------------------------------------------------------------------------------
/libs/fun_utils.py:
--------------------------------------------------------------------------------
  1 | from typing import Callable, TypeVar, Generic, Any
  2 | from typing import Iterable, Iterator, List, Tuple, Dict
  3 | 
  4 | from re import findall
  5 | from itertools import chain, repeat, islice
  6 | from sys import stderr
  7 | 
  8 | T = TypeVar("T")
  9 | R = TypeVar("R", contravariant=True)
 10 | K = TypeVar("K")
 11 | V = TypeVar("V")
 12 | 
 13 | # == Normal function utils ==
 14 | identity = lambda x: x
 15 | noOp = lambda x: ()
 16 | 
 17 | def let(transform: Callable[[T], R], x: T) -> R:
 18 |   return transform(x) if x != None else None
 19 | 
 20 | def also(op: Callable[[T], Any], x: T) -> T:
 21 |   op(x)
 22 |   return x
 23 | 
 24 | def require(p: bool, message: str):
 25 |   if not p: raise ValueError(message)
 26 | 
 27 | def toMapper(transform: Callable[[T], R]) -> Callable[[Iterable[T]], List[R]]:
 28 |   return lambda xs: [transform(x) for x in xs]
 29 | 
 30 | def zipWithNext(xs: List[T]) -> Iterator[Tuple[T, T]]:
 31 |   require(len(xs) >= 2, f"len {len(xs)} is too short (< 2)")
 32 |   for i in range(1, len(xs)):
 33 |     yield (xs[i-1], xs[i])
 34 | 
 35 | def chunked(n: int, xs: Iterator[T]) -> Iterator[T]:
 36 |   while True:
 37 |     try: #< must return when inner gen finished
 38 |       first = next(xs)
 39 |     except StopIteration: return
 40 |     chunk = islice(xs, n)
 41 |     yield chain((first,) , chunk)
 42 | 
 43 | def collect2(selector2: Callable[[T], Tuple[R, R]], xs: Iterable[T]) -> Tuple[List[R], List[R]]:
 44 |   bs, cs = [], []
 45 |   for x in xs:
 46 |     b, c = selector2(x)
 47 |     bs.append(b); cs.append(c)
 48 |   return (bs, cs)
 49 | 
 50 | def expandRangeStartList(size: int, entries: T, key: Callable[[T], int] = lambda it: it[0], value: Callable[[T], V] = lambda it: it[1]) -> List[V]:
 51 |   sorted_entries = sorted(entries, key=key)
 52 |   items = list(repeat(None, size))
 53 |   def assignRange(start, stop, value):
 54 |     items[start:stop] = repeat(value, stop - start)
 55 |   for (a, b) in zipWithNext(sorted_entries):
 56 |     assignRange(key(a), key(b), value(a))
 57 |   last_item = sorted_entries[-1]
 58 |   assignRange(key(last_item), size, value(last_item))
 59 |   return items
 60 | 
 61 | class PatternType:
 62 |   def __init__(self, regex, transform = identity):
 63 |     self.regex, self.transform = regex, transform
 64 |   def __repr__(self): return f"PatternType({self.regex})"
 65 |   def __call__(self, text):
 66 |     groups = findall(self.regex, text)
 67 |     return list(map(self.transform, groups))
 68 | 
 69 | 
 70 | def snakeSplit(text): return text.strip().split("_")
 71 | def titleCased(texts, sep = " "): return sep.join(map(str.capitalize, texts))
 72 | 
 73 | def printAttributes(fmt = lambda k, v: f"[{titleCased(snakeSplit(k))}] {v}", sep = "\n", file = stderr, **kwargs):
 74 |   entries = [fmt(k, v) for (k, v) in kwargs.items()]
 75 |   print(sep.join(entries), file=file)
 76 | 
 77 | class Reducer(Generic[T, R]):
 78 |   def __init__(self): pass
 79 |   def accept(self, value: T): pass
 80 |   def finish(self) -> R: pass
 81 |   def reduce(self, xs: Iterable[T]) -> R:
 82 |     for x in xs:
 83 |       self.accept(x)
 84 |     return self.finish()
 85 | 
 86 | class EffectReducer(Reducer[T, R]):
 87 |   ''' `Reducer` defined using makeBase/onAccept '''
 88 |   def __init__(self):
 89 |     self._base = self._makeBase()
 90 |   @classmethod #< used in ctor
 91 |   def _makeBase(cls): pass
 92 |   def _onAccept(self, base, value): pass
 93 |   def accept(self, value):
 94 |     self._onAccept(self._base, value)
 95 |   def finish(self):
 96 |     return self._base
 97 | 
 98 | class AsNoOp(Reducer[Any, None]):
 99 |   def __init__(self, *args): pass
100 | 
101 | class AsDict(EffectReducer[Tuple[K, V], Dict[K, V]]):
102 |   @classmethod
103 |   def _makeBase(cls): return dict()
104 |   def _onAccept(self, base, value):
105 |     (k, v) = value
106 |     base[k] = v
107 | 


--------------------------------------------------------------------------------
/libs/lrc.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from typing import Tuple
 4 | from re import findall
 5 | 
 6 | SEC_MS = 1000
 7 | MIN_MS = 60*SEC_MS
 8 | HOUR_MS = 60*MIN_MS
 9 | 
10 | def time_just(v: float, n = 2, pad = '0') -> str:
11 |   text = str(int(v))
12 |   return text.rjust(n, pad)
13 | 
14 | def millis2MinSecMs(ms) -> Tuple[int, int, int]:
15 |   mins, r = divmod(ms, MIN_MS)
16 |   secs, r = divmod(r, SEC_MS)
17 |   return (mins, secs, r)
18 | 
19 | def millis2HourMinSecMs(ms) -> Tuple[int, int, int, int]:
20 |   hrs, r = divmod(ms, HOUR_MS)
21 |   mins, r = divmod(r, MIN_MS)
22 |   secs, r = divmod(r, SEC_MS)
23 |   return (hrs, mins, secs, r)
24 | 
25 | def makeConvertorFps2Ms(fps):
26 |   ''' creates a frame-no to millis convertor '''
27 |   return lambda no: (float(no) / fps) * SEC_MS
28 | 
29 | 
30 | def millis2LrcTime(ms) -> str:
31 |   mins, secs, r = millis2MinSecMs(ms)
32 |   return f"{time_just(mins)}:{time_just(secs)}.{int(r)}"
33 | 
34 | def dumpsLrc(ms, text) -> str:
35 |   return f"[{millis2LrcTime(ms)}] {text}"
36 | 
37 | def loadsLrc(text) -> Tuple[int, str]:
38 |   mm, ss, rrr, lyric = findall(r"^\[(\d{2}):(\d{2}).(\d+)\] ?(.+)$", text)[0]
39 |   mins, secs, r = map(int, (mm, ss, rrr) )
40 |   return (mins*MIN_MS + secs*SEC_MS + r, lyric)
41 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | opencv-python>=4.2
2 | numpy>=1.0
3 | scipy>=1
4 | pytesseract>=0.3
5 | strsimpy>=0.1
6 | matplotlib>=3
7 | 
8 | progressbar>=0.3
9 | 


--------------------------------------------------------------------------------
/timeline_ops.py:
--------------------------------------------------------------------------------
  1 | #!/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | from sys import argv, stdin, stderr
  5 | from re import findall
  6 | from json import dumps, loads
  7 | 
  8 | from libs.fun_utils import zipWithNext
  9 | from libs.cv_utils import stringSimilarity
 10 | from libs.lrc import makeConvertorFps2Ms, dumpsLrc, millis2HourMinSecMs, time_just
 11 | 
 12 | from os import environ
 13 | def env(name, transform, default): return default if name not in environ else transform(environ[name])
 14 | 
 15 | pipeSubtitle = env("PIPE", lambda code: eval(f"lambda it: {code}"), lambda it: it)
 16 | 
 17 | class Record:
 18 |   ''' Value record on the time line '''
 19 |   def __init__(self, start: int, end: int, value):
 20 |     self.start, self.end, self.value = start, end, value
 21 |   def __str__(self):
 22 |     return f"{self.start}-{self.end} {dumps(self.value, ensure_ascii=False)}"
 23 |   @staticmethod
 24 |   def loads(line):
 25 |     start, end, text = findall(r"^(\d+)-(\d+) (.*)$", line)[0]
 26 |     return Record(int(start), int(end), loads(text))
 27 |   def mapTime(self, transform):
 28 |     return Record(transform(self.start), transform(self.end), self.value)
 29 | 
 30 | class Timeline:
 31 |   def __init__(self, time: int, value):
 32 |     self.time, self.value = time, value
 33 |   def __str__(self):
 34 |     return f"{self.time} {dumps(self.value, ensure_ascii=False)}"
 35 |   @staticmethod
 36 |   def loads(line):
 37 |     time, text = findall(r"^(\d+) (.*)$", line)[0]
 38 |     return [int(time), loads(text)]
 39 | 
 40 | #^ Two data&representations: Record(+end) and timeline(time, text)
 41 | 
 42 | def openTimeline(path):
 43 |   return [Timeline.loads(ln) for ln in open(path, "r").readlines()]
 44 | 
 45 | def mergeDebug(path):
 46 |   for (a, b) in zipWithNext(openTimeline(path)):
 47 |     ta, sa = a; tb, sb = b
 48 |     sa1, sb1 = map(pipeSubtitle, (sa, sb))
 49 |     v = stringSimilarity(sa1, sb1)
 50 |     print(f"{ta}-{tb} {str(v)[0:4].ljust(4, '0')} {sa1} | {sb1}")
 51 | 
 52 | def merge(path, strsim_bound_max, consume = print):
 53 |   bound_max = float(strsim_bound_max)
 54 |   (last_text, start, end) = (pipeSubtitle(""), 0, 0)
 55 |   onConsume = lambda: consume(Record(start, end, last_text))
 56 |   for (time, text) in openTimeline(path):
 57 |     text1 = pipeSubtitle(text)
 58 |     if stringSimilarity(last_text, text1) < bound_max:# or last_text.isspace():
 59 |       last_text = text1
 60 |       end = time #< renew end
 61 |     else:
 62 |       onConsume()
 63 |       (last_text, start, end) = (text, time, time)
 64 |   onConsume()
 65 | 
 66 | lines = lambda s: iter(s.readline, "")
 67 | 
 68 | def stdinSimplify():
 69 |   for line in lines(stdin):
 70 |     rec = Record.loads(line)
 71 |     print(Timeline(rec.start, rec.value))
 72 | 
 73 | #v Lyric and formats
 74 | def millis2SrtTime(ms, ms_sep = ",") -> str:
 75 |   hrs, mins, secs, r = millis2HourMinSecMs(ms)
 76 |   return f"{time_just(hrs)}:{time_just(mins)}:{time_just(secs)}{ms_sep}{int(r)}"
 77 | 
 78 | def makeLyricFormater(fmt):
 79 |   if fmt == "lrc": return lambda rec: dumpsLrc(rec.start, rec.value)
 80 |   elif fmt == "srt":
 81 |     fTime = millis2SrtTime
 82 |     index = 1
 83 |     def _nextRecord(rec):
 84 |       nonlocal index
 85 |       line = f"{index}\n{fTime(rec.start)} --> {fTime(rec.end)}\n{rec.value}\n"
 86 |       index += 1
 87 |       return line
 88 |     return _nextRecord
 89 |   else: raise ValueError(f"unknown format {fmt}")
 90 | 
 91 | def stdinToLRC(fps, fmt = "lrc"):
 92 |   ms = makeConvertorFps2Ms(float(fps))
 93 |   accept = makeLyricFormater(fmt)
 94 |   for line in lines(stdin):
 95 |     rec = Record.loads(line).mapTime(ms)
 96 |     lrc = accept(rec)
 97 |     print(lrc)
 98 | 
 99 | handler = { "merge-debug": mergeDebug, "merge": merge, "simplify": stdinSimplify, "to-lrc": stdinToLRC }
100 | 
101 | def main(args):
102 |   if len(args) == 0:
103 |     tl = "timeline_file"
104 |     print(f"Usage: merge-debug <{tl}> | merge <{tl}> <strsim_bound_max> | simplify | to-lrc <fps> (srt)", file=stderr)
105 |     return
106 |   key_op = args[0]
107 |   handler[key_op](*args[1:])
108 | 
109 | if __name__ == "__main__": main(argv[1:])
110 | 


--------------------------------------------------------------------------------
/vcat_subtitle_imgs.py:
--------------------------------------------------------------------------------
 1 | #!/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | from typing import NewType, Tuple, Dict
 5 | from PIL.Image import Image
 6 | from PIL import Image as Pillow
 7 | 
 8 | LTRD = NewType("LTRD", Tuple[int,int,int,int])
 9 | 
10 | def imagePixels(img):
11 |   for y in range(0, img.height):
12 |     for x in range(0, img.width):
13 |       yield img.getpixel((x, y))
14 | 
15 | def channelHistogram(img) -> Tuple:
16 |   n_channels = Pillow.getmodebands(img.mode)
17 |   hist = img.histogram()
18 |   return tuple(hist[i:i+256] for i in range(0, n_channels*256, 256))
19 | 
20 | def count(xs): return sum(map(lambda _: 1, xs))
21 | 
22 | 
23 | def vConcateImages(path_imgs: Dict[str,Image]) -> Tuple[Image, Dict[str,LTRD]]:
24 |   width = max(map(lambda it: it.width, path_imgs.values()))
25 |   height = sum(map(lambda it: it.height, path_imgs.values()))
26 |   newImage = Pillow.new("RGBA", (width, height))
27 |   iter_img = iter(path_imgs.items())
28 |   areas = {}
29 |   y = 0
30 |   while y < height:
31 |     (path, img) = next(iter_img)
32 |     box = (0,y, img.width,y+img.height)
33 |     areas[path] = box
34 |     newImage.paste(img, box)
35 |     y += img.height
36 |   return (newImage, areas)
37 | 
38 | def saveCropImages(img: Image, areas: Dict[str,LTRD]):
39 |   for (k, v) in areas.items():
40 |     area = img.crop(v)
41 |     area.save(k)
42 |     onAreaWrote(k, area)
43 | 
44 | 
45 | from re import findall
46 | from json import dump, load
47 | def main(args):
48 |   if len(args) == 0:
49 |     print("Usage: dst src... | unpack dst\n(ON_AREA_WROTE=lambda path, img: )")
50 |     return
51 |   if args[0] == "unpack":
52 |     path = args[1]
53 |     areas = load(open(f"{path}.json", "r"))
54 |     saveCropImages(Pillow.open(path), areas)
55 |   else:
56 |     dst = args[0]
57 |     srcs = sorted(args[1:], key=lambda s: int(findall(r"(\d+)", s)[0]))
58 |     print(f"{dst} << {srcs[:5]}..{srcs[-5:]} ({len(srcs)})")
59 | 
60 |     (img, areas) = vConcateImages({path: Pillow.open(path) for path in srcs})
61 |     img.save(dst); dump(areas, open(f"{dst}.json", "w+"))
62 | 
63 | import os
64 | def defaultOnAreaWrote(path, img, mark_range=range(20, 1000), show=lambda n, r: n > r.stop):
65 |   (r,_,_) = channelHistogram(img)[0:3]
66 |   if r[0xFF] > 0:
67 |     n_marks = count(filter(lambda it: it[0:3] == (0xFF,0,0), imagePixels(img)))
68 |     if n_marks not in mark_range: return
69 | 
70 |     if show(n_marks, mark_range): img.show(title = f"Removed {path}")
71 |     print(f"Removing {path} (redmarks {n_marks})")
72 |     os.remove(path)
73 | 
74 | onAreaWrote = eval("lambda path, img: " + (os.environ.get("ON_AREA_WROTE") or "defaultOnAreaWrote(path, img)"))
75 | 
76 | from sys import argv
77 | if __name__ == "__main__": main(argv[1:])
78 | 


--------------------------------------------------------------------------------