├── README.md
├── LICENSE
├── .gitignore
└── meltsub.py


/README.md:
--------------------------------------------------------------------------------
 1 | # meltsub
 2 | 
 3 | Takes a raw and an hardsub video, outputs subtitles extracted from the hardsub video.
 4 | 
 5 | ## Disclaimer
 6 | 
 7 | I take no legal responsibility for anything this code is used for. This is purely an educational proof of concept.
 8 | 
 9 | ## Usage
10 | 
11 | Dependencies:
12 | * opencv
13 | * tesseract
14 | 
15 | ```shell
16 | python meltsub.py
17 | ```
18 | 
19 | ## Settings
20 | 
21 | Change the variable *subtitles_lang* to set another language.
22 | 
23 | Use this command to check installed language:
24 | 
25 | ```shell
26 | tesseract --list-langs
27 | ```
28 | 
29 | ## License
30 | 
31 | MIT
32 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 emersion
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | env/
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | 
 49 | # Translations
 50 | *.mo
 51 | *.pot
 52 | 
 53 | # Django stuff:
 54 | *.log
 55 | local_settings.py
 56 | 
 57 | # Flask stuff:
 58 | instance/
 59 | .webassets-cache
 60 | 
 61 | # Scrapy stuff:
 62 | .scrapy
 63 | 
 64 | # Sphinx documentation
 65 | docs/_build/
 66 | 
 67 | # PyBuilder
 68 | target/
 69 | 
 70 | # Jupyter Notebook
 71 | .ipynb_checkpoints
 72 | 
 73 | # pyenv
 74 | .python-version
 75 | 
 76 | # celery beat schedule file
 77 | celerybeat-schedule
 78 | 
 79 | # SageMath parsed files
 80 | *.sage.py
 81 | 
 82 | # dotenv
 83 | .env
 84 | 
 85 | # virtualenv
 86 | .venv
 87 | venv/
 88 | ENV/
 89 | 
 90 | # Spyder project settings
 91 | .spyderproject
 92 | .spyproject
 93 | 
 94 | # Rope project settings
 95 | .ropeproject
 96 | 
 97 | # mkdocs documentation
 98 | /site
 99 | 
100 | # mypy
101 | .mypy_cache/
102 | 


--------------------------------------------------------------------------------
/meltsub.py:
--------------------------------------------------------------------------------
  1 | from collections import Iterable
  2 | import io
  3 | import re
  4 | import subprocess
  5 | 
  6 | import cv2
  7 | 
  8 | softsub_path = "softsub.mkv"
  9 | hardsub_path = "hardsub.mp4"
 10 | subtitles_path = "subtitles.srt"
 11 | subtitles_lang = "fra"
 12 | align_on_hardsubs = False
 13 | align_frames = 5
 14 | align_from = 3*60 # seconds
 15 | 
 16 | softsub_video = cv2.VideoCapture(softsub_path)
 17 | hardsub_video = cv2.VideoCapture(hardsub_path)
 18 | 
 19 | softsub_fps = softsub_video.get(cv2.CAP_PROP_FPS)
 20 | hardsub_fps = hardsub_video.get(cv2.CAP_PROP_FPS)
 21 | 
 22 | def median(numbers):
 23 | 	numbers = sorted(numbers)
 24 | 	center = len(numbers) // 2
 25 | 	if len(numbers) % 2 == 0:
 26 | 		return sum(numbers[center - 1:center + 1]) / 2.0
 27 | 	else:
 28 | 		return numbers[center]
 29 | 
 30 | def frame_sum(frame):
 31 | 	height, width = frame.shape[:2]
 32 | 	s = sum(cv2.reduce(frame, 1, cv2.REDUCE_SUM, dtype=cv2.CV_32S))[0]
 33 | 	if isinstance(s, Iterable):
 34 | 		s = sum(s)
 35 | 	return s / (height * width)
 36 | 
 37 | def frame_diff(a, b):
 38 | 	diff = cv2.subtract(a, b)
 39 | 	#diff = cv2.cvtColor(diff, cv2.COLOR_BGR2GRAY)
 40 | 	return frame_sum(diff)
 41 | 
 42 | def find_key_frames(video, threshold=70):
 43 | 	key_frames = {}
 44 | 
 45 | 	fps = video.get(cv2.CAP_PROP_FPS)
 46 | 	#resolution = video.get(cv2.CAP_PROP_FRAME_WIDTH) * video.get(cv2.CAP_PROP_FRAME_HEIGHT)
 47 | 
 48 | 	ok, last = video.read()
 49 | 	if not ok:
 50 | 		return key_frames
 51 | 	while(len(key_frames) < align_frames):
 52 | 		ok, current = video.read()
 53 | 		if not ok:
 54 | 			break
 55 | 		#diff = cv2.subtract(last, current)
 56 | 		#cv2.imshow("last", last)
 57 | 		#cv2.imshow("current", current)
 58 | 		#cv2.imshow("diff", diff)
 59 | 		d = frame_diff(last, current)
 60 | 		if d > threshold:
 61 | 			pos = video.get(cv2.CAP_PROP_POS_FRAMES)
 62 | 			key_frames[pos] = current
 63 | 			print("Found key frame:", pos, d)
 64 | 			#cv2.waitKey(0)
 65 | 		#else:
 66 | 		#	cv2.waitKey(int(1/fps*1000))
 67 | 		last = current
 68 | 
 69 | 	cv2.destroyAllWindows()
 70 | 
 71 | 	return key_frames
 72 | 
 73 | def match_keyframes(softsub_frames, hardsub_frames, max_diff=10):
 74 | 	matches = []
 75 | 	for softsub_pos, softsub_frame in softsub_frames.items():
 76 | 		best_diff = float("inf")
 77 | 		best_frame = None
 78 | 		best_pos = -1
 79 | 
 80 | 		for hardsub_pos, hardsub_frame in hardsub_frames.items():
 81 | 			d = frame_diff(softsub_frame, hardsub_frame)
 82 | 			if d > max_diff:
 83 | 				continue
 84 | 			if d < best_diff:
 85 | 				best_diff = d
 86 | 				best_frame = hardsub_frame
 87 | 				best_pos = hardsub_pos
 88 | 
 89 | 		if best_frame is None:
 90 | 			continue
 91 | 
 92 | 		pos_diff_sec = softsub_pos/softsub_fps - best_pos/hardsub_fps
 93 | 		print("image_diff={} pos_diff_sec={}".format(best_diff, pos_diff_sec))
 94 | 
 95 | 		#cv2.imshow("softsub", softsub_frame)
 96 | 		#cv2.imshow("hardsub", best_frame)
 97 | 		#cv2.waitKey(0)
 98 | 
 99 | 		matches.append(pos_diff_sec)
100 | 
101 | 	cv2.destroyAllWindows()
102 | 
103 | 	return median(matches)
104 | 
105 | def ocr(img):
106 | 	ok, buf = cv2.imencode(".bmp", img)
107 | 	if not ok:
108 | 		raise Exception("Cannot encode image")
109 | 
110 | 	args = ["/usr/bin/tesseract", "stdin", "stdout", "-l", subtitles_lang]
111 | 	p = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
112 | 
113 | 	p.stdin.write(buf)
114 | 	p.stdin.close()
115 | 
116 | 	lines = []
117 | 	for line in io.TextIOWrapper(p.stdout, encoding="utf-8"):
118 | 		line = line.rstrip()
119 | 		if len(line) == 0:
120 | 			continue
121 | 		lines.append(line)
122 | 	p.wait()
123 | 
124 | 	return "\n".join(lines)
125 | 
126 | def timecode(ms):
127 | 	s, ms = divmod(ms, 1000)
128 | 	min, s = divmod(s, 60)
129 | 	h, min = divmod(min, 60)
130 | 	return "{:02}:{:02}:{:02},{:03}".format(h, min, s, ms)
131 | 
132 | replacements = [
133 | 	# Unicode
134 | 	("\n—", "\n-"),
135 | 	("…", "..."),
136 | 	("‘", "'"),
137 | 
138 | 	# French
139 | 	("II", "Il"),
140 | 	("I'", "l'"),
141 | ]
142 | 
143 | def cleanup(text):
144 | 	text = "\n"+text+"\n"
145 | 
146 | 	for (a, b) in replacements:
147 | 		text = text.replace(a, b)
148 | 
149 | 	return text.strip()
150 | 
151 | def extract_subs(f, softsub_video, hardsub_video, pos_diff_sec):
152 | 	threshold = 5
153 | 	#wait_dur = -1
154 | 	wait_dur = 1
155 | 	#wait_dur = int(1/softsub_fps*1000)
156 | 
157 | 	sub_index = 0
158 | 	sub_frame = None
159 | 	sub_start = 0
160 | 	while(True):
161 | 		ok, softsub_frame = softsub_video.read()
162 | 		if not ok:
163 | 			break
164 | 
165 | 		softsub_pos = softsub_video.get(cv2.CAP_PROP_POS_FRAMES)
166 | 		softsub_t = softsub_pos/softsub_fps
167 | 
168 | 		hardsub_frame = None
169 | 		hardsub_t = 0
170 | 		hardsub_eof = False
171 | 		while(True):
172 | 			hardsub_pos = hardsub_video.get(cv2.CAP_PROP_POS_FRAMES)
173 | 			hardsub_t = hardsub_pos/hardsub_fps
174 | 
175 | 			if hardsub_t >= softsub_t - pos_diff_sec:
176 | 				break
177 | 
178 | 			ok, hardsub_frame = hardsub_video.read()
179 | 			if not ok:
180 | 				hardsub_eof = True
181 | 				break
182 | 		if hardsub_eof:
183 | 			break
184 | 		if hardsub_frame is None:
185 | 			continue
186 | 
187 | 		#diff = cv2.absdiff(softsub_frame, hardsub_frame)
188 | 		diff = cv2.subtract(255-softsub_frame, 255-hardsub_frame)
189 | 		diff = cv2.cvtColor(diff, cv2.COLOR_BGR2GRAY)
190 | 		#_, diff = cv2.threshold(diff, 250, 255, cv2.THRESH_BINARY)
191 | 		#diff = cv2.GaussianBlur(diff, (5,5), 0)
192 | 		diff = cv2.multiply(diff, 2)
193 | 		_, diff = cv2.threshold(diff, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)
194 | 
195 | 		s = frame_sum(diff)
196 | 
197 | 		diff = 255 - diff
198 | 
199 | 		if wait_dur >= 0:
200 | 			#cv2.imshow("softsub", softsub_frame)
201 | 			#cv2.imshow("hardsub", hardsub_frame)
202 | 			cv2.imshow("diff", diff)
203 | 			key = cv2.waitKey(wait_dur)
204 | 			if key == ord(" "):
205 | 				key = cv2.waitKey(0)
206 | 			if key == ord("q"):
207 | 				break
208 | 			if key == ord("s"):
209 | 				cv2.imwrite("output.png", diff)
210 | 
211 | 		t = softsub_t
212 | 		if align_on_hardsubs:
213 | 			t = hardsub_t
214 | 
215 | 		if s > 0.1 and s < threshold:
216 | 			if sub_frame is None:
217 | 				sub_frame = diff
218 | 				sub_start = int(t * 1000)
219 | 				print("{} ({:.2f}) - ".format(timecode(sub_start), s), end="", flush=True)
220 | 		else:
221 | 			if sub_frame is not None:
222 | 				sub_end = int(t * 1000)
223 | 
224 | 				print("{} ({:.2f}) ".format(timecode(sub_end), s), end="")
225 | 
226 | 				if sub_end - sub_start > 300:
227 | 					text = ocr(sub_frame)
228 | 					text = cleanup(text)
229 | 					if len(text) > 0:
230 | 						print(text)
231 | 
232 | 						f.write("{}\n".format(sub_index))
233 | 						f.write("{} --> {}\n".format(timecode(sub_start), timecode(sub_end)))
234 | 						f.write(text+"\n\n")
235 | 					else:
236 | 						print("<skipped: no data>".format(timecode(sub_end)))
237 | 				else:
238 | 					print("<skipped: too quick>".format(timecode(sub_end)))
239 | 
240 | 				sub_index += 1
241 | 				sub_frame = None
242 | 			in_sub = False
243 | 
244 | 	cv2.destroyAllWindows()
245 | 
246 | print("Aligning videos on {} frames...".format(align_frames))
247 | 
248 | softsub_video.set(cv2.CAP_PROP_POS_FRAMES, align_from * softsub_fps)
249 | hardsub_video.set(cv2.CAP_PROP_POS_FRAMES, align_from * hardsub_fps)
250 | softsub_key_frames = find_key_frames(softsub_video)
251 | hardsub_key_frames = find_key_frames(hardsub_video)
252 | softsub_video.set(cv2.CAP_PROP_POS_FRAMES, 0)
253 | hardsub_video.set(cv2.CAP_PROP_POS_FRAMES, 0)
254 | 
255 | # pos_diff_sec = softsub_pos - hardsub_pos
256 | pos_diff_sec = match_keyframes(softsub_key_frames, hardsub_key_frames)
257 | print("pos_diff_sec={}".format(pos_diff_sec))
258 | 
259 | print("Writing {} subtitles to {}...".format(subtitles_lang, subtitles_path))
260 | 
261 | with open(subtitles_path, "w") as f:
262 | 	extract_subs(f, softsub_video, hardsub_video, pos_diff_sec)
263 | 
264 | softsub_video.release()
265 | hardsub_video.release()
266 | 
267 | cv2.destroyAllWindows()
268 | 


--------------------------------------------------------------------------------