├── README.md ├── LICENSE ├── .gitignore └── meltsub.py /README.md: -------------------------------------------------------------------------------- 1 | # meltsub 2 | 3 | Takes a raw and an hardsub video, outputs subtitles extracted from the hardsub video. 4 | 5 | ## Disclaimer 6 | 7 | I take no legal responsibility for anything this code is used for. This is purely an educational proof of concept. 8 | 9 | ## Usage 10 | 11 | Dependencies: 12 | * opencv 13 | * tesseract 14 | 15 | ```shell 16 | python meltsub.py 17 | ``` 18 | 19 | ## Settings 20 | 21 | Change the variable *subtitles_lang* to set another language. 22 | 23 | Use this command to check installed language: 24 | 25 | ```shell 26 | tesseract --list-langs 27 | ``` 28 | 29 | ## License 30 | 31 | MIT 32 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 emersion 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # SageMath parsed files 80 | *.sage.py 81 | 82 | # dotenv 83 | .env 84 | 85 | # virtualenv 86 | .venv 87 | venv/ 88 | ENV/ 89 | 90 | # Spyder project settings 91 | .spyderproject 92 | .spyproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | 97 | # mkdocs documentation 98 | /site 99 | 100 | # mypy 101 | .mypy_cache/ 102 | -------------------------------------------------------------------------------- /meltsub.py: -------------------------------------------------------------------------------- 1 | from collections import Iterable 2 | import io 3 | import re 4 | import subprocess 5 | 6 | import cv2 7 | 8 | softsub_path = "softsub.mkv" 9 | hardsub_path = "hardsub.mp4" 10 | subtitles_path = "subtitles.srt" 11 | subtitles_lang = "fra" 12 | align_on_hardsubs = False 13 | align_frames = 5 14 | align_from = 3*60 # seconds 15 | 16 | softsub_video = cv2.VideoCapture(softsub_path) 17 | hardsub_video = cv2.VideoCapture(hardsub_path) 18 | 19 | softsub_fps = softsub_video.get(cv2.CAP_PROP_FPS) 20 | hardsub_fps = hardsub_video.get(cv2.CAP_PROP_FPS) 21 | 22 | def median(numbers): 23 | numbers = sorted(numbers) 24 | center = len(numbers) // 2 25 | if len(numbers) % 2 == 0: 26 | return sum(numbers[center - 1:center + 1]) / 2.0 27 | else: 28 | return numbers[center] 29 | 30 | def frame_sum(frame): 31 | height, width = frame.shape[:2] 32 | s = sum(cv2.reduce(frame, 1, cv2.REDUCE_SUM, dtype=cv2.CV_32S))[0] 33 | if isinstance(s, Iterable): 34 | s = sum(s) 35 | return s / (height * width) 36 | 37 | def frame_diff(a, b): 38 | diff = cv2.subtract(a, b) 39 | #diff = cv2.cvtColor(diff, cv2.COLOR_BGR2GRAY) 40 | return frame_sum(diff) 41 | 42 | def find_key_frames(video, threshold=70): 43 | key_frames = {} 44 | 45 | fps = video.get(cv2.CAP_PROP_FPS) 46 | #resolution = video.get(cv2.CAP_PROP_FRAME_WIDTH) * video.get(cv2.CAP_PROP_FRAME_HEIGHT) 47 | 48 | ok, last = video.read() 49 | if not ok: 50 | return key_frames 51 | while(len(key_frames) < align_frames): 52 | ok, current = video.read() 53 | if not ok: 54 | break 55 | #diff = cv2.subtract(last, current) 56 | #cv2.imshow("last", last) 57 | #cv2.imshow("current", current) 58 | #cv2.imshow("diff", diff) 59 | d = frame_diff(last, current) 60 | if d > threshold: 61 | pos = video.get(cv2.CAP_PROP_POS_FRAMES) 62 | key_frames[pos] = current 63 | print("Found key frame:", pos, d) 64 | #cv2.waitKey(0) 65 | #else: 66 | # cv2.waitKey(int(1/fps*1000)) 67 | last = current 68 | 69 | cv2.destroyAllWindows() 70 | 71 | return key_frames 72 | 73 | def match_keyframes(softsub_frames, hardsub_frames, max_diff=10): 74 | matches = [] 75 | for softsub_pos, softsub_frame in softsub_frames.items(): 76 | best_diff = float("inf") 77 | best_frame = None 78 | best_pos = -1 79 | 80 | for hardsub_pos, hardsub_frame in hardsub_frames.items(): 81 | d = frame_diff(softsub_frame, hardsub_frame) 82 | if d > max_diff: 83 | continue 84 | if d < best_diff: 85 | best_diff = d 86 | best_frame = hardsub_frame 87 | best_pos = hardsub_pos 88 | 89 | if best_frame is None: 90 | continue 91 | 92 | pos_diff_sec = softsub_pos/softsub_fps - best_pos/hardsub_fps 93 | print("image_diff={} pos_diff_sec={}".format(best_diff, pos_diff_sec)) 94 | 95 | #cv2.imshow("softsub", softsub_frame) 96 | #cv2.imshow("hardsub", best_frame) 97 | #cv2.waitKey(0) 98 | 99 | matches.append(pos_diff_sec) 100 | 101 | cv2.destroyAllWindows() 102 | 103 | return median(matches) 104 | 105 | def ocr(img): 106 | ok, buf = cv2.imencode(".bmp", img) 107 | if not ok: 108 | raise Exception("Cannot encode image") 109 | 110 | args = ["/usr/bin/tesseract", "stdin", "stdout", "-l", subtitles_lang] 111 | p = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL) 112 | 113 | p.stdin.write(buf) 114 | p.stdin.close() 115 | 116 | lines = [] 117 | for line in io.TextIOWrapper(p.stdout, encoding="utf-8"): 118 | line = line.rstrip() 119 | if len(line) == 0: 120 | continue 121 | lines.append(line) 122 | p.wait() 123 | 124 | return "\n".join(lines) 125 | 126 | def timecode(ms): 127 | s, ms = divmod(ms, 1000) 128 | min, s = divmod(s, 60) 129 | h, min = divmod(min, 60) 130 | return "{:02}:{:02}:{:02},{:03}".format(h, min, s, ms) 131 | 132 | replacements = [ 133 | # Unicode 134 | ("\n—", "\n-"), 135 | ("…", "..."), 136 | ("‘", "'"), 137 | 138 | # French 139 | ("II", "Il"), 140 | ("I'", "l'"), 141 | ] 142 | 143 | def cleanup(text): 144 | text = "\n"+text+"\n" 145 | 146 | for (a, b) in replacements: 147 | text = text.replace(a, b) 148 | 149 | return text.strip() 150 | 151 | def extract_subs(f, softsub_video, hardsub_video, pos_diff_sec): 152 | threshold = 5 153 | #wait_dur = -1 154 | wait_dur = 1 155 | #wait_dur = int(1/softsub_fps*1000) 156 | 157 | sub_index = 0 158 | sub_frame = None 159 | sub_start = 0 160 | while(True): 161 | ok, softsub_frame = softsub_video.read() 162 | if not ok: 163 | break 164 | 165 | softsub_pos = softsub_video.get(cv2.CAP_PROP_POS_FRAMES) 166 | softsub_t = softsub_pos/softsub_fps 167 | 168 | hardsub_frame = None 169 | hardsub_t = 0 170 | hardsub_eof = False 171 | while(True): 172 | hardsub_pos = hardsub_video.get(cv2.CAP_PROP_POS_FRAMES) 173 | hardsub_t = hardsub_pos/hardsub_fps 174 | 175 | if hardsub_t >= softsub_t - pos_diff_sec: 176 | break 177 | 178 | ok, hardsub_frame = hardsub_video.read() 179 | if not ok: 180 | hardsub_eof = True 181 | break 182 | if hardsub_eof: 183 | break 184 | if hardsub_frame is None: 185 | continue 186 | 187 | #diff = cv2.absdiff(softsub_frame, hardsub_frame) 188 | diff = cv2.subtract(255-softsub_frame, 255-hardsub_frame) 189 | diff = cv2.cvtColor(diff, cv2.COLOR_BGR2GRAY) 190 | #_, diff = cv2.threshold(diff, 250, 255, cv2.THRESH_BINARY) 191 | #diff = cv2.GaussianBlur(diff, (5,5), 0) 192 | diff = cv2.multiply(diff, 2) 193 | _, diff = cv2.threshold(diff, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU) 194 | 195 | s = frame_sum(diff) 196 | 197 | diff = 255 - diff 198 | 199 | if wait_dur >= 0: 200 | #cv2.imshow("softsub", softsub_frame) 201 | #cv2.imshow("hardsub", hardsub_frame) 202 | cv2.imshow("diff", diff) 203 | key = cv2.waitKey(wait_dur) 204 | if key == ord(" "): 205 | key = cv2.waitKey(0) 206 | if key == ord("q"): 207 | break 208 | if key == ord("s"): 209 | cv2.imwrite("output.png", diff) 210 | 211 | t = softsub_t 212 | if align_on_hardsubs: 213 | t = hardsub_t 214 | 215 | if s > 0.1 and s < threshold: 216 | if sub_frame is None: 217 | sub_frame = diff 218 | sub_start = int(t * 1000) 219 | print("{} ({:.2f}) - ".format(timecode(sub_start), s), end="", flush=True) 220 | else: 221 | if sub_frame is not None: 222 | sub_end = int(t * 1000) 223 | 224 | print("{} ({:.2f}) ".format(timecode(sub_end), s), end="") 225 | 226 | if sub_end - sub_start > 300: 227 | text = ocr(sub_frame) 228 | text = cleanup(text) 229 | if len(text) > 0: 230 | print(text) 231 | 232 | f.write("{}\n".format(sub_index)) 233 | f.write("{} --> {}\n".format(timecode(sub_start), timecode(sub_end))) 234 | f.write(text+"\n\n") 235 | else: 236 | print("".format(timecode(sub_end))) 237 | else: 238 | print("".format(timecode(sub_end))) 239 | 240 | sub_index += 1 241 | sub_frame = None 242 | in_sub = False 243 | 244 | cv2.destroyAllWindows() 245 | 246 | print("Aligning videos on {} frames...".format(align_frames)) 247 | 248 | softsub_video.set(cv2.CAP_PROP_POS_FRAMES, align_from * softsub_fps) 249 | hardsub_video.set(cv2.CAP_PROP_POS_FRAMES, align_from * hardsub_fps) 250 | softsub_key_frames = find_key_frames(softsub_video) 251 | hardsub_key_frames = find_key_frames(hardsub_video) 252 | softsub_video.set(cv2.CAP_PROP_POS_FRAMES, 0) 253 | hardsub_video.set(cv2.CAP_PROP_POS_FRAMES, 0) 254 | 255 | # pos_diff_sec = softsub_pos - hardsub_pos 256 | pos_diff_sec = match_keyframes(softsub_key_frames, hardsub_key_frames) 257 | print("pos_diff_sec={}".format(pos_diff_sec)) 258 | 259 | print("Writing {} subtitles to {}...".format(subtitles_lang, subtitles_path)) 260 | 261 | with open(subtitles_path, "w") as f: 262 | extract_subs(f, softsub_video, hardsub_video, pos_diff_sec) 263 | 264 | softsub_video.release() 265 | hardsub_video.release() 266 | 267 | cv2.destroyAllWindows() 268 | --------------------------------------------------------------------------------