├── README.md
├── config.txt
├── launcher.py
├── screenshot.png
├── ui
├── ui.css
├── ui.html
└── ui.js
└── video2sub.py
/README.md:
--------------------------------------------------------------------------------
1 | # video2sub
2 |
3 | ## 硬字幕OCR提取工具
4 |
5 | video2sub 提供图形界面,方便执行字幕 OCR 识别、编辑、校对等操作。
6 |
7 | video2sub 本身没有实现任何 OCR 识别算法,而是使用外部库进行 OCR 识别。目前支持 [chineseocr](https://github.com/chineseocr/chineseocr) 和[百度文字识别 API](https://ai.baidu.com/tech/ocr/general)。其中 chineseocr 支持离线运行,而百度文字识别 API 需要联网运行。建议先用 chineseocr 识别出哪些帧含有字幕,再调用百度文字识别 API 进行精确识别。
8 |
9 | 
10 |
11 | ## 下载地址
12 |
13 | - [**百度网盘**](https://pan.baidu.com/s/1sLc22mr1PUh0X2HYXnAQzg) (提取码: **ydkn**)
14 | - [**GitHub**](https://github.com/zhangboyang/video2sub/releases)
15 |
16 | |文件名|说明|
17 | |-|-|
18 | |video2sub-20210331.7z|下载后解压并运行 **launcher.exe** 即可。|
19 | |chineseocr-gpu.7z|GPU 版 chineseocr 识别引擎,需要 NVIDIA 显卡。下载后解压并运行 **chineseocr-gpu.exe** 即可。
20 | |chineseocr-cpu.7z|CPU 版 chineseocr 识别引擎,无需显卡,但速度很慢,不建议使用。
21 |
22 | ## 系统需求
23 |
24 | - Windows 7 (64位)或更高版本
25 | - 8GB 以上内存(若不使用 GPU 则 4GB 即可)
26 | - NVIDIA 显卡:用于 chineseocr 加速(纯 CPU 也能运行,就是太慢)
27 |
--------------------------------------------------------------------------------
/config.txt:
--------------------------------------------------------------------------------
1 | # 请按Python语法编辑此文件(代码使用ast.literal_eval读入此文件)
2 |
3 | {
4 | ## 通用设置 ######################################################
5 |
6 | 'host': '127.0.0.1',
7 | 'port': [20000, 21000], # 前端与后端的通信端口范围(左闭右开区间)
8 |
9 | 'max_log': 1500, # (显示出来的)最大日志条目项数
10 | 'max_checkpoint': 1500, # (显示出来的)最大恢复点项数
11 |
12 | 'fix_ntsc_fps': True, # 修正NTSC帧率(如29.970改为30000/1001)
13 |
14 | 'export_suffix': '.chs', # 导出文件的后缀
15 | 'export_overwrite': False, # 导出时是否覆盖已存在文件
16 |
17 | ## UI 界面设置 ######################################################
18 |
19 | 'thumbnail': { # 缩略图设置
20 | 'height': 150, # 高度
21 | 'npart': 10, # 同一文件中合并存储几张缩略图
22 | 'jpg_quality': 85 # JPG质量参数
23 | },
24 |
25 | 'subthumb': { # 字幕预览设置
26 | 'width': 470, # 宽度(只对新建的字幕有效)
27 | 'jpg_quality': 90 # JPG质量参数
28 | },
29 |
30 | 'default': { # 默认设置
31 | 'OCR': {
32 | 'engine': 'chineseocr:multi', # 默认OCR引擎名
33 | 'top': -1, # 默认字幕区域Y1
34 | 'bottom': -1, # 默认字幕区域Y2
35 | },
36 | 'UI': {
37 | 'editorfontsize': 30, # 默认编辑框字体大小
38 | },
39 | },
40 |
41 | ## OCR 引擎设置 ######################################################
42 |
43 | 'allengines': [ # 引擎名字
44 | ['chineseocr:multi', 'chineseocr(多行模式) -- 推荐“新OCR”使用'],
45 | ['chineseocr:single', 'chineseocr(单行模式)'],
46 | ['baiduocr:accurate', '百度OCR(高精度,批量) -- 推荐“重新OCR”使用'],
47 | ['baiduocr:accurate_basic', '百度OCR(高精度,单独) -- 推荐“空项OCR”使用'],
48 | ['baiduocr:general', '百度OCR(标准版,批量)'],
49 | ['baiduocr:general_basic', '百度OCR(标准版,单独)'],
50 | ['dummyocr', 'dummyocr(调试用)'],
51 | ],
52 |
53 | #### chineseocr设置
54 | 'chineseocr': {
55 | 'url': 'http://127.0.0.1:8080/ocr', # API地址
56 | },
57 |
58 | #### 百度OCR设置
59 | 'baiduocr': {
60 | # API介绍 https://ai.baidu.com/tech/ocr/general
61 | # 账号申请方法 https://cloud.baidu.com/doc/OCR/s/dk3iqnq51
62 | # 请把下面的API_KEY、SECRET_KEY改为自己申请的账号
63 | 'API_KEY': 'GmhC18eVP1Fo1ECX911dtOzw',
64 | 'SECRET_KEY': 'PQ2ukO4Aec2PTsgQU9UkiEKYciavlZk8',
65 |
66 | 'qps_limit': 2, # 每秒调用次数限制
67 | 'batch_size': 20, # 批处理大小
68 | 'language_type': 'CHN_ENG', # 语言类型,参见 https://ai.baidu.com/ai-doc/OCR/1k3h7y3db
69 |
70 | 'TOKEN_URL': 'https://aip.baidubce.com/oauth/2.0/token',
71 | 'OCR_URL': 'https://aip.baidubce.com/rest/2.0/ocr/v1/',
72 | },
73 |
74 | #### dummyocr(调试用)
75 | 'dummyocr': {
76 | 'always_error': 0,
77 | 'text': '测试',
78 | },
79 |
80 | ## ASS 导出设置 ######################################################
81 |
82 | 'ass_format': {
83 | 'header': ################ ASS文件头模板
84 | '''[Script Info]
85 | Title: {{文件名}}
86 | ScriptType: v4.00+
87 | WrapStyle: 0
88 | ScaledBorderAndShadow: yes
89 | YCbCr Matrix: None
90 | PlayResX: {{视频宽度}}
91 | PlayResY: {{视频高度}}
92 |
93 | [V4+ Styles]
94 | Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
95 | Style: Default,方正兰亭圆_GBK_准,{{字幕高度}},&H00FFFFFF,&H000000FF,&H00000000,&H00000000,0,0,0,0,100,100,0,0,1,2,0,2,10,10,{{下半屏底边距}},1
96 | Style: TopHalf,方正兰亭圆_GBK_准,{{字幕高度}},&H00FFFFFF,&H000000FF,&H00000000,&H00000000,0,0,0,0,100,100,0,0,1,2,0,8,10,10,{{上半屏顶边距}},1
97 |
98 | [Events]
99 | Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
100 | ''',
101 | 'top_half': ################ ASS字幕行模板(上半屏)
102 | '''Dialogue: 0,{{开始时间}},{{结束时间}},TopHalf,,0,0,0,,{{字幕文本}}
103 | ''',
104 | 'bottom_half': ################ ASS字幕行模板(下半屏)
105 | '''Dialogue: 0,{{开始时间}},{{结束时间}},Default,,0,0,0,,{{字幕文本}}
106 | ''',
107 | 'time_alignment': { ################ ASS字幕时间对齐方式
108 | 'start': ('floor', +0.5, +1e-3, 0), 'end': ('floor', +0.5, +1e-3, 0), # 适用于VLC播放器
109 | #'start': ('floor', 0, +1e-3, 0), 'end': ('floor', 0, +1e-3, 0), # 适用于MPC-HC播放器
110 | },
111 | },
112 |
113 | }
114 |
--------------------------------------------------------------------------------
/launcher.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | import ast
3 | import sys
4 | import os
5 | import subprocess
6 | import json
7 | import urllib.request
8 | import time
9 | import traceback
10 | import secrets
11 | import socket
12 | from threading import Thread
13 | from datetime import datetime
14 | if os.name == 'nt':
15 | import msvcrt
16 | import win32con
17 | import win32console
18 | import win32api
19 | import win32gui
20 | import win32file
21 | import winerror
22 | windows = True
23 | else:
24 | import signal
25 | import fcntl
26 | windows = False
27 |
28 | if windows:
29 | class LockedFileWriter:
30 | def __init__(self, path):
31 | while True:
32 | try:
33 | self.handle = win32file.CreateFile(path, win32file.GENERIC_READ | win32file.GENERIC_WRITE, win32file.FILE_SHARE_READ, None, win32file.TRUNCATE_EXISTING, 0, None)
34 | break
35 | except win32file.error as e:
36 | if e.winerror != winerror.ERROR_SHARING_VIOLATION:
37 | raise e
38 | time.sleep(0.1)
39 | def write(self, b):
40 | n = win32file.WriteFile(self.handle, b)[1]
41 | assert n == len(b)
42 | def close(self):
43 | self.handle.Close()
44 | def file_select(title, flt):
45 | try:
46 | ret = win32gui.GetOpenFileNameW(
47 | hwndOwner=win32console.GetConsoleWindow(),
48 | Title=title,
49 | MaxFile=1048576,
50 | Flags=
51 | win32con.OFN_ALLOWMULTISELECT |
52 | win32con.OFN_PATHMUSTEXIST |
53 | win32con.OFN_FILEMUSTEXIST |
54 | win32con.OFN_HIDEREADONLY |
55 | win32con.OFN_EXPLORER |
56 | win32con.OFN_DONTADDTORECENT |
57 | win32con.OFN_NOCHANGEDIR,
58 | Filter=flt)
59 | files = ret[0].split('\0')
60 | if len(files) > 1:
61 | files = [os.path.join(files[0], file) for file in files[1:]]
62 | return files
63 | except win32gui.error:
64 | return []
65 | class ListSelectDialog:
66 | className = 'VIDEO2SUB_SELECTDLG'
67 | def __init__(self, title, msg, lst):
68 | self.title = title
69 | self.msg = msg
70 | self.lst = lst
71 | self.selitem = None
72 | wc = win32gui.WNDCLASS()
73 | wc.style = win32con.CS_VREDRAW | win32con.CS_HREDRAW
74 | wc.SetDialogProc()
75 | wc.cbWndExtra = win32con.DLGWINDOWEXTRA
76 | wc.hInstance = win32gui.dllhandle
77 | wc.hCursor = win32gui.LoadCursor(0, win32con.IDC_ARROW)
78 | wc.hbrBackground = win32con.COLOR_WINDOW + 1
79 | wc.lpszClassName = self.className
80 | try:
81 | win32gui.RegisterClass(wc)
82 | except win32gui.error as e:
83 | if e.winerror != winerror.ERROR_CLASS_ALREADY_EXISTS:
84 | raise e
85 | def DoModel(self):
86 | style = win32con.DS_SETFONT | win32con.DS_MODALFRAME | win32con.WS_POPUP | win32con.WS_SYSMENU | win32con.WS_VISIBLE | win32con.WS_CAPTION | win32con.CS_DBLCLKS
87 | s = win32con.WS_CHILD | win32con.WS_VISIBLE
88 | win32gui.DialogBoxIndirect(win32gui.dllhandle, [
89 | [self.title, (0, 0, 180, 148), style, None, (12, "宋体"), None, self.className],
90 | [128, "确定", win32con.IDOK, (68, 127, 50, 14), s | win32con.WS_TABSTOP | win32con.BS_DEFPUSHBUTTON],
91 | [128, "取消", win32con.IDCANCEL, (123, 127, 50, 14), s | win32con.WS_TABSTOP | win32con.BS_PUSHBUTTON],
92 | [130, self.msg, -1, (7, 7, 166, 13), s | win32con.SS_LEFT],
93 | [131, None, 1000, (7, 22, 166, 98), s | win32con.WS_TABSTOP | win32con.LBS_NOINTEGRALHEIGHT | win32con.LBS_NOTIFY | win32con.WS_VSCROLL | win32con.WS_BORDER]
94 | ], win32console.GetConsoleWindow(), {
95 | win32con.WM_COMMAND: self.OnCommand,
96 | win32con.WM_INITDIALOG: self.OnInitDialog,
97 | })
98 | def OnCommand(self, hwnd, msg, wparam, lparam):
99 | if wparam == win32con.IDCANCEL:
100 | self.selitem = None
101 | win32gui.EndDialog(hwnd, 0)
102 | elif wparam == win32con.IDOK or (win32api.LOWORD(wparam) == 1000 and win32api.HIWORD(wparam) == win32con.LBN_DBLCLK):
103 | listbox = win32gui.GetDlgItem(hwnd, 1000)
104 | sel = win32gui.SendMessage(listbox, win32con.LB_GETCURSEL, 0, 0)
105 | self.selitem = self.lst[sel] if 0 <= sel and sel < len(self.lst) else None
106 | if self.selitem is not None:
107 | win32gui.EndDialog(hwnd, 0)
108 | else:
109 | win32api.MessageBox(hwnd, '请从列表中选择一个项目', self.title, win32con.MB_ICONWARNING)
110 | def OnInitDialog(self, hwnd, msg, wparam, lparam):
111 | l, t, r, b = win32gui.GetWindowRect(hwnd)
112 | pl, pt, pr, pb = win32gui.GetWindowRect(win32gui.GetParent(hwnd) if win32gui.GetParent(hwnd) else win32gui.GetDesktopWindow())
113 | xoff = ((pr - pl) - (r - l)) // 2
114 | yoff = ((pb - pt) - (b - t)) // 2
115 | win32gui.SetWindowPos(hwnd, win32con.HWND_TOP, pl + xoff, pt + yoff, 0, 0, win32con.SWP_NOSIZE)
116 | listbox = win32gui.GetDlgItem(hwnd, 1000)
117 | for item in self.lst:
118 | win32gui.SendMessage(listbox, win32con.LB_ADDSTRING, 0, item[1])
119 | #win32gui.SendMessage(listbox, win32con.LB_SETCURSEL, 0, 0)
120 | def list_select(title, msg, lst):
121 | print('请在弹出的窗口中进行选择')
122 | dlg = ListSelectDialog(title, msg, lst)
123 | dlg.DoModel()
124 | return dlg.selitem
125 | else:
126 | class LockedFileWriter:
127 | def __init__(self, path):
128 | self.f = open(path, 'r+b')
129 | fcntl.flock(self.f.fileno(), fcntl.LOCK_EX)
130 | self.f.seek(0)
131 | self.f.truncate()
132 | def write(self, b):
133 | n = self.f.write(b)
134 | assert n == len(b)
135 | self.f.flush()
136 | def close(self):
137 | self.f.close()
138 | def list_select(title, msg, lst):
139 | print(msg + ':')
140 | for i, item in enumerate(lst):
141 | print(' %d. %s' % (i + 1, item[1]))
142 | while True:
143 | print('请输入编号: ', end='', flush=True)
144 | try:
145 | sel = int(input())
146 | if 1 <= sel and sel <= len(lst):
147 | return lst[sel - 1]
148 | except ValueError:
149 | pass
150 | print('无效输入')
151 |
152 | sys.stdout.reconfigure(errors='replace')
153 | sys.stderr.reconfigure(errors='replace')
154 |
155 | if getattr(sys, 'frozen', False) and hasattr(sys, '_MEIPASS'):
156 | bundle_dir = os.path.abspath(sys._MEIPASS)
157 | else:
158 | bundle_dir = os.path.dirname(os.path.abspath(__file__))
159 |
160 | EXTENSIONS_VIDEO = "*.3g2;*.3gp;*.3gp2;*.3gpp;*.amv;*.asf;*.avi;*.bik;*.bin;*.crf;*.dav;*.divx;*.drc;*.dv;*.dvr-ms;*.evo;*.f4v;*.flv;*.gvi;*.gxf;*.iso;*.m1v;*.m2v;" \
161 | "*.m2t;*.m2ts;*.m4v;*.mkv;*.mov;*.mp2;*.mp2v;*.mp4;*.mp4v;*.mpe;*.mpeg;*.mpeg1;" \
162 | "*.mpeg2;*.mpeg4;*.mpg;*.mpv2;*.mts;*.mtv;*.mxf;*.mxg;*.nsv;*.nuv;" \
163 | "*.ogg;*.ogm;*.ogv;*.ogx;*.ps;" \
164 | "*.rec;*.rm;*.rmvb;*.rpl;*.thp;*.tod;*.tp;*.ts;*.tts;*.txd;*.vob;*.vro;*.webm;*.wm;*.wmv;*.wtv;*.xesc"
165 |
166 | class BackendDiedError(Exception):
167 | pass
168 |
169 | waitkey = False
170 | fail = False
171 |
172 | gconfig = ast.literal_eval(open(os.path.join(bundle_dir, 'config.txt'), 'r', encoding='utf_8_sig').read())
173 | url = None
174 | backend = None
175 | frontend = None
176 | session = None
177 | lastwatch = 0
178 | video = None
179 | backendlog = None
180 | backendlog_file = os.path.join(bundle_dir, 'backendlog_%s_%d.txt' % (datetime.now().strftime('%Y%m%d_%H%M%S'), os.getpid()))
181 | backendlog_preserve = False
182 |
183 | def alloc_hostport():
184 | host = gconfig['host']
185 | port_range = list(range(*gconfig['port']))
186 | while len(port_range) > 0:
187 | port = secrets.choice(port_range)
188 | port_range = [p for p in port_range if p != port]
189 | with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
190 | try:
191 | s.bind((host, port))
192 | return (host, port)
193 | except:
194 | pass
195 | raise Exception('无可用后端通信端口')
196 |
197 | def exe(path):
198 | if windows:
199 | root, ext = os.path.splitext(path)
200 | if ext == '.py':
201 | if getattr(sys, 'frozen', False) and hasattr(sys, '_MEIPASS'):
202 | ext = '.exe'
203 | else:
204 | return ['python', path]
205 | return [root + ext]
206 | else:
207 | if os.path.isabs(path):
208 | return [path]
209 | else:
210 | return [os.path.join('.', path)]
211 |
212 | def run_backend(file, silent=False, nosession=False):
213 | global backend
214 | global url
215 | global video
216 | global session
217 | global lastwatch
218 | global backendlog
219 | hostport = alloc_hostport()
220 | cmdline = exe('video2sub.py') + [hostport[0], str(hostport[1]), file]
221 | if silent:
222 | if backendlog is not None:
223 | backendlog.close()
224 | with open(backendlog_file, 'a', errors='replace') as f:
225 | f.write('>>>>> [%s] >>>>> %s\n' % (datetime.now().strftime('%Y-%m-%d %H:%M:%S'), json.dumps(cmdline, ensure_ascii=False)))
226 | backendlog = open(backendlog_file, 'ab')
227 | backend = subprocess.Popen(cmdline, stdout=backendlog if silent else None, stderr=subprocess.STDOUT if silent else None)
228 | video = os.path.basename(file)
229 | url = 'http://%s:%d' % hostport
230 | session = None
231 | lastwatch = 0
232 | if api('/getpid') != backend.pid:
233 | raise Exception("pid mismatch")
234 | if not nosession:
235 | session = api('/session')
236 | waitsec = 0.1
237 | while not api('/state')['loaded']:
238 | watch()
239 | time.sleep(waitsec)
240 | waitsec = min(waitsec * 2, 1)
241 | watch()
242 |
243 | def check_backend():
244 | if backend.poll() is not None:
245 | raise BackendDiedError()
246 |
247 | def run_frontend():
248 | global frontend
249 | apppath = 'APP-win32-x64' if windows else 'APP-linux-x64'
250 | appexe = 'APP'
251 | appconf = os.path.join(apppath, 'resources', 'app', 'nativefier.json')
252 | with open(appconf + '-template', 'r', encoding='utf-8') as f:
253 | conf = json.load(f)
254 | conf['targetUrl'] = url
255 | w = LockedFileWriter(appconf)
256 | w.write(json.dumps(conf, ensure_ascii=False, separators=(',', ':')).encode())
257 | frontend = subprocess.Popen(exe(os.path.join(apppath, appexe)) + ['--disable-smooth-scrolling'], close_fds=True)
258 | waitsec = 0.1
259 | while not api('/havesession'):
260 | time.sleep(waitsec)
261 | waitsec = min(waitsec * 2, 1)
262 | w.close()
263 |
264 | def api(path, data=b'', retry=True):
265 | while True:
266 | try:
267 | req = urllib.request.Request(url + path)
268 | if session is not None:
269 | req.add_header('X-VIDEO2SUB-SESSION', session)
270 | if data is not None and type(data) is not bytes:
271 | data = json.dumps(data).encode('utf-8')
272 | req.add_header('Content-Type', 'application/json')
273 | req.data = data
274 | req = urllib.request.urlopen(req)
275 | rsp = req.read()
276 | try:
277 | return json.loads(rsp.decode('utf-8'))
278 | except:
279 | return rsp
280 | except urllib.error.HTTPError as e:
281 | raise e
282 | except Exception as e:
283 | check_backend()
284 | if not retry:
285 | return None
286 | time.sleep(0.1)
287 |
288 | def watch():
289 | global lastwatch
290 | logs = api('/logs')
291 | for arr in logs['row']:
292 | row = dict(zip(logs['col'], arr))
293 | if row['cursession'] and row['id'] > lastwatch:
294 | print(video, '#', row['date'], '#', row['level'], row['message'])
295 | lastwatch = row['id']
296 |
297 | try:
298 | files = sys.argv[1:]
299 | if windows and len(files) == 0:
300 | print('请从弹出的窗口中选择要处理的文件')
301 | print(' 若只指定一个文件,则使用图形界面模式')
302 | print(' 若指定多个文件,则使用批处理模式')
303 | print()
304 | files = file_select('请选择要处理的文件', '视频文件 (%s)\0%s\0video2sub 数据库文件 (*.v2s)\0*.v2s\0所有文件 (*.*)\0*.*\0'%(EXTENSIONS_VIDEO, EXTENSIONS_VIDEO))
305 | files = [os.path.abspath(p) for p in files]
306 | os.chdir(bundle_dir)
307 | if len(files) == 0:
308 | print('使用方法:')
309 | print(' %s [视频/数据库文件...]'%os.path.basename(sys.argv[0]))
310 | print('若只指定一个文件,则使用图形界面模式')
311 | print('若指定多个文件,则使用批处理模式')
312 | elif len(files) == 1:
313 | print('图形界面模式')
314 | print('=====')
315 | run_backend(files[0], nosession=True)
316 | run_frontend()
317 | frontend.wait()
318 | elif len(files) > 1:
319 | print('批处理模式')
320 | print('=====')
321 | print('文件列表:\n '+'\n '.join(map(os.path.basename, files)))
322 | print('=====')
323 | op = list_select('批处理模式', '已选择%d个文件,请选择要执行的操作'%len(files), [
324 | ('ocr', '批量OCR'),
325 | ('exportass', '批量导出ASS'),
326 | ('exportcsv', '批量导出CSV'),
327 | ('gui', '同时打开多个图形界面进行操作'),
328 | ])
329 | if op is not None:
330 | print('操作:', op[1])
331 | op = op[0]
332 | else:
333 | print('没有选择操作')
334 | print('=====')
335 | start_time = time.time()
336 | success = 0
337 | if op is None:
338 | success = -1
339 | elif op == 'ocr':
340 | print('正从第一个文件“%s”中读取OCR设置'%os.path.basename(files[0]))
341 | run_backend(files[0], silent=True)
342 | ocrconfig = api('/loadconfig', {'key':'OCR'})
343 | backend.kill()
344 | backend.wait()
345 | print('将要使用的OCR设置:', ocrconfig)
346 | print('=====')
347 | for file in files:
348 | print('正处理:', os.path.basename(file))
349 | run_backend(file, silent=True)
350 | file_start_time = time.time()
351 | for i in range(9999999):
352 | api('/saveconfig', {'key':'OCR', 'value':ocrconfig, 'msg':'设定批处理OCR设置: '+str(ocrconfig)})
353 | state = api('/state')
354 | if state['lastjob'] > 0 or state['curarea'] == 0:
355 | if i > 10:
356 | print('迭代次数过多,已放弃')
357 | break
358 | if i > 0:
359 | print('迭代(第%d次)'%i)
360 |
361 | if state['lastjob'] > 0:
362 | print('数据库中有未处理完毕的项目,执行“继续OCR”操作')
363 | ret = api('/continueocr', {'item_range': None, 'restarttype': ''})
364 | elif state['curarea'] == 0:
365 | print('数据库中无该OCR区域的项目,执行“新OCR”操作')
366 | ret = api('/startocr', {'frame_range': None})
367 | else:
368 | assert False
369 |
370 | if ret == b'ok':
371 | while api('/state')['ocractive']:
372 | watch()
373 | time.sleep(1)
374 | else:
375 | watch()
376 | print('无法启动OCR任务,请检查第一个文件的OCR设置是否正确')
377 | break
378 | watch()
379 | else:
380 | print('没有要做的任务' if i == 0 else '任务已完成')
381 | success += 1
382 | break
383 | backend.kill()
384 | backend.wait()
385 | print('耗时%.2f秒'%(time.time() - file_start_time))
386 | print('=====')
387 | elif op.startswith('export'):
388 | for file in files:
389 | print('正处理:', os.path.basename(file))
390 | run_backend(file, silent=True)
391 | if api('/' + op) == b'ok':
392 | success += 1
393 | watch()
394 | backend.kill()
395 | backend.wait()
396 | print('=====')
397 | elif op == 'gui':
398 | success = -1
399 | children = []
400 | waiters = []
401 | def do_wait(i):
402 | children[i].wait()
403 | print('已退出:', os.path.basename(files[i]))
404 | def do_join():
405 | for waiter in waiters:
406 | waiter.join()
407 | try:
408 | for i, file in enumerate(files):
409 | if i > 0:
410 | time.sleep(1)
411 | print('正打开:', os.path.basename(file))
412 | children.append(subprocess.Popen(exe('launcher.py') + [file], close_fds=True))
413 | waiters.append(Thread(target=do_wait, args=(i,)))
414 | waiters[i].start()
415 | if windows:
416 | joiner = Thread(target=do_join)
417 | joiner.start()
418 | while joiner.is_alive(): # avoid join() which can't be interrupted
419 | time.sleep(1)
420 | else:
421 | do_join()
422 | except:
423 | traceback.print_exc()
424 | if not windows:
425 | for child in children:
426 | child.send_signal(signal.SIGINT)
427 | do_join()
428 | else:
429 | assert False
430 | if success >= 0:
431 | print('共%d个文件,成功%d个,失败%d个,耗时%.2f秒'%(len(files), success, len(files)-success, time.time()-start_time))
432 | if success != len(files):
433 | backendlog_preserve = True
434 | if windows:
435 | waitkey = True
436 | except KeyboardInterrupt:
437 | print('手动中断')
438 | except Exception as e:
439 | if isinstance(e, BackendDiedError):
440 | print('(后端异常退出)')
441 | else:
442 | print('===== 异常 =====')
443 | traceback.print_exc()
444 | if backendlog is not None:
445 | backendlog.close()
446 | with open(backendlog_file, 'r', errors='replace') as f:
447 | lines = f.readlines()
448 | for i in range(len(lines)):
449 | if lines[-i - 1].startswith('>>>>>'):
450 | lines = lines[-i:] if i > 0 else []
451 | break
452 | maxshow = 50
453 | if len(lines) > maxshow:
454 | print('===== 后端日志的最后 %d 行 ====='%maxshow)
455 | lines = lines[-maxshow:]
456 | else:
457 | print('===== 后端日志 =====')
458 | print(''.join(lines), end='')
459 | fail = True
460 | except:
461 | traceback.print_exc()
462 | fail = True
463 |
464 | if backend:
465 | backend.kill()
466 | backend.wait()
467 | if frontend:
468 | frontend.kill()
469 | frontend.wait()
470 |
471 | if backendlog is not None:
472 | backendlog.close()
473 | if not fail and not backendlog_preserve and os.path.exists(backendlog_file):
474 | os.remove(backendlog_file)
475 | else:
476 | print('(后端日志已存储为: %s)'%backendlog_file)
477 |
478 | exitmsg = []
479 | exitcode = 0
480 | if fail:
481 | print('=====')
482 | exitmsg.append('遇到致命错误')
483 | exitcode = 1
484 | if windows:
485 | waitkey = True
486 | if waitkey:
487 | if windows:
488 | exitmsg.append('按任意键退出程序')
489 | else:
490 | exitmsg.append('按回车退出程序')
491 | if len(exitmsg) > 0:
492 | print(','.join(exitmsg))
493 | if waitkey:
494 | if windows:
495 | msvcrt.getch()
496 | else:
497 | input()
498 | sys.exit(exitcode)
499 |
--------------------------------------------------------------------------------
/screenshot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhangboyang/video2sub/5964e919d3e024cfb4272fa3b90c4943adeb85dc/screenshot.png
--------------------------------------------------------------------------------
/ui/ui.css:
--------------------------------------------------------------------------------
1 | html, body {
2 | margin: 0;
3 | padding: 0;
4 | width: 100%;
5 | height: 100%;
6 | font-size: 16px;
7 | line-height: 20px;
8 | background-color: white;
9 | font-family: Tahoma, '宋体', SimSun, sans-serif;
10 | overflow-x: hidden;
11 | overflow-y: auto;
12 | }
13 | input, select {
14 | font-family: Tahoma, '宋体', SimSun, sans-serif;
15 | font-size: 14px;
16 | }
17 | input:focus, select:focus {
18 | outline: none;
19 | }
20 | a {
21 | color: blue;
22 | text-decoration: underline;
23 | cursor: pointer;
24 | user-select: none;
25 | }
26 | a.button {
27 | display: inline-block;
28 | height: 20px;
29 | vertical-align: middle;
30 | position: relative;
31 | background-color: white;
32 | border: 1px solid gray;
33 | padding: 2px 3px;
34 | text-align: center;
35 | }
36 | a.button:hover {
37 | border-color: black;
38 | }
39 | a.button > div {
40 | display: none;
41 | }
42 | a.button:hover > div {
43 | text-align: left;
44 | position: absolute;
45 | width: max-content;
46 | display: inline-block;
47 | background-color: white;
48 | border: 1px solid black;
49 | z-index: 100;
50 | padding: 2px;
51 | color: black;
52 | pointer-events: none;
53 | }
54 | a.button:hover > div.bl { /* bottom-left tooltip */
55 | top: 26px;
56 | left: -1px;
57 | }
58 | a.button:hover > div.br { /* bottom-right tooltip*/
59 | top: 26px;
60 | right: -1px;
61 | }
62 | a.button:hover > div.tr { /* top-right tooltip*/
63 | bottom: 26px;
64 | right: -1px;
65 | }
66 | [v-cloak] {
67 | display: none;
68 | }
69 | #app {
70 | width: 100%;
71 | height: 100%;
72 | position: relative;
73 | }
74 | #loading {
75 | position: absolute;
76 | left: 0;
77 | top: 0;
78 | height: 100%;
79 | width: 100%;
80 | background-color: white;
81 | padding: 20px;
82 | box-sizing: border-box;
83 | }
84 | #apparea {
85 | width: 100%;
86 | height: 100%;
87 | display: flex;
88 | flex-direction: column;
89 | }
90 | #apparea > div {
91 | position: relative;
92 | }
93 | #apparea > div:not(:first-child) {
94 | border-top: 1px black solid;
95 | }
96 | .dialogbackground {
97 | position: absolute;
98 | left: 0;
99 | top: 0;
100 | width: 100%;
101 | height: 100%;
102 | background-color: rgba(0, 0, 0, 0.5);
103 | }
104 | #title {
105 | text-align: center;
106 | }
107 |
108 | div.imgdb {
109 | position: relative;
110 | overflow: hidden;
111 | }
112 | div.imgdb > img {
113 | position: absolute;
114 | }
115 | #video.editorup {
116 | flex-direction: column-reverse;
117 | }
118 | #video {
119 | display: flex;
120 | user-select: none;
121 | flex-direction: column;
122 | }
123 | #framearea {
124 | display: flex;
125 | margin-bottom: 5px;
126 | }
127 | #framebox {
128 | display: flex;
129 | flex-direction: column;
130 | margin: 0 10px;
131 | }
132 | .editorup #framebox {
133 | flex-direction: column-reverse;
134 | }
135 | #frametitle {
136 | border: 1px solid black;
137 | border-bottom: 0;
138 | }
139 | .editorup #frametitle {
140 | border: 1px solid black;
141 | border-bottom: 1px solid black;
142 | border-top: 0;
143 | }
144 | #frame {
145 | border: 1px solid black;
146 | position: relative;
147 | background-color: cadetblue;
148 | }
149 | #frame > #frameevent {
150 | left: 0;
151 | top: 0;
152 | position: absolute;
153 | width: 100%;
154 | height: 100%;
155 | }
156 | #frame > img {
157 | width: 100%;
158 | height: 100%;
159 | }
160 | #frameleft {
161 | flex: 1;
162 | overflow-wrap: anywhere;
163 | writing-mode: vertical-rl;
164 | }
165 | #frameright {
166 | flex: 1;
167 | overflow-wrap: anywhere;
168 | writing-mode: vertical-lr;
169 | }
170 | #frameleft > *, #frameright > * {
171 | writing-mode: horizontal-tb;
172 | margin: 0 5px;
173 | }
174 | div.toolbox {
175 | border: 1px solid gray;
176 | }
177 | div.toolbox > strong {
178 | box-sizing: border-box;
179 | display: block;
180 | width: 100%;
181 | padding: 2px 5px;
182 | text-align: center;
183 | }
184 | div.toolbox > div:not(:first-child) {
185 | border-top: 1px solid gray;
186 | }
187 | div.toolbox > div.text {
188 | padding: 2px 5px;
189 | }
190 |
191 | #frame > #ocrarea {
192 | width: calc(100% + 2px);
193 | position: absolute;
194 | border-style: solid;
195 | border-width: 0px 9px;
196 | left: -10px;
197 | }
198 | .neighborframe {
199 | float: right;
200 | display: flex;
201 | flex-direction: column;
202 | }
203 | .neighborframe > .imgdb {
204 | cursor: pointer;
205 | }
206 | .editorup .neighborframe {
207 | flex-direction: column-reverse;
208 | }
209 | .editorup div.neighborframe.toolbox > div:not(:first-child) {
210 | border-top: 0;
211 | border-bottom: 1px solid gray;
212 | }
213 |
214 | #dispsize, #jumpframe, #ocrinfo {
215 | margin-top: 10px;
216 | float: left;
217 | }
218 | #jumpframe td {
219 | text-align: right;
220 | cursor: pointer;
221 | color: blue;
222 | text-decoration: underline;
223 | }
224 | #ocrinfo {
225 | max-width: 140px;
226 | }
227 |
228 | #editor {
229 | margin-bottom: 5px;
230 | display: flex;
231 | }
232 | #editor > div {
233 | flex: 1;
234 | margin: 0 5px;
235 | position: relative;
236 | }
237 | #editor > div:first-child > div {
238 | right: 0;
239 | }
240 | #editor > div > div {
241 | min-width: max-content;
242 | position: absolute;
243 | bottom: 0;
244 | }
245 | #editor > input {
246 | display: block;
247 | margin: 0 auto;
248 | padding: 2px;
249 | text-align: center;
250 | border: 1px solid black;
251 | font-family: Arial, '黑体', SimHei;
252 | }
253 | #editor > input:read-only {
254 | background-color: lightgray;
255 | }
256 |
257 | #thumbnail {
258 | position: absolute;
259 | border: 1px solid #00ff00;
260 | bottom: 0;
261 | }
262 | #timebar {
263 | background-color: magenta;
264 | height: 30px;
265 | min-height: 30px;
266 | user-select: none;
267 | overflow: hidden;
268 | }
269 | #timebar > div {
270 | position: absolute;
271 | left: 0;
272 | top: 0;
273 | height: 100%;
274 | }
275 | #timebar > div#barbg {
276 | width: 100%;
277 | }
278 | #timebar > div#posmarker, #timebar > div#mousemarker {
279 | width: 0;
280 | height: 0;
281 | margin: 0 -5px;
282 | border-style: solid;
283 | border-width: 0 5px 10px 5px;
284 | border-color: transparent transparent #00ff00 transparent;
285 | z-index: 2;
286 | }
287 | #timebar > #mouselabel {
288 | margin-left: 10px;
289 | color: black;
290 | z-index: 2;
291 | }
292 | #timebar > canvas {
293 | height: 100%;
294 | position: absolute;
295 | left: 0;
296 | top: 0;
297 | z-index: 1;
298 | }
299 | #thumbbar {
300 | background-color:lightgray;
301 | }
302 |
303 |
304 | #op input[type='text'] {
305 | width: 70px;
306 | height: 26px;
307 | border: 1px solid black;
308 | box-sizing: border-box;
309 | vertical-align: middle;
310 | }
311 | #op input[type='file'] {
312 | display: none;
313 | }
314 | #op select {
315 | width: 150px;
316 | height: 26px;
317 | box-sizing: border-box;
318 | border: 1px solid black;
319 | vertical-align: middle;
320 | background-color: greenyellow;
321 | }
322 | #op {
323 | background-color: #e8ffcd;
324 | user-select: none;
325 | display: flex;
326 | }
327 | #op > div:nth-child(2) {
328 | flex: 1
329 | }
330 | #op > div > div {
331 | display: block;
332 | padding: 2px 10px;
333 | height: 26px;
334 | }
335 | #op > div > div > span {
336 | line-height: 26px;
337 | display: inline-block;
338 | vertical-align: top;
339 | }
340 | #op > div:not(:first-child) > div {
341 | border-left: 1px solid black;
342 | }
343 | #op > div > div:not(:first-child) {
344 | border-top: 1px solid black;
345 | }
346 | #op #export {
347 | background-color: white;
348 | }
349 | #op #export a.button {
350 | color:rgb(0, 80, 0);
351 | }
352 |
353 | #codeeditor {
354 | border: 1px solid black;
355 | background-color: white;
356 | width: 80%;
357 | height: 80%;
358 | position: absolute;
359 | top: 10%;
360 | left: 10%;
361 | z-index: 20;
362 | display: flex;
363 | flex-direction: column;
364 | user-select: none;
365 | }
366 | #codeeditor > div:not(:first-child) {
367 | border-top: 1px solid black;
368 | }
369 | #codeeditor > #codeeditortitle {
370 | text-align: center;
371 | line-height: 24px;
372 | position: relative;
373 | }
374 | #codeeditor > #codeeditortitle > a {
375 | position: absolute;
376 | width: 24px;
377 | border-left: 1px solid black;
378 | right: 0;
379 | top: 0;
380 | }
381 | #codeeditor > #editordiv {
382 | flex: 1;
383 | overflow: hidden;
384 | }
385 | #codeeditor > #scriptselect {
386 | padding: 2px 5px;
387 | }
388 | #codeeditor > #scriptselect > select {
389 | border: 1px solid black;
390 | background-color: azure;
391 | height: 26px;
392 | width: 300px;
393 | }
394 | #codeeditor > #editordiv > .CodeMirror {
395 | height: 100%;
396 | width: 100%;
397 | }
398 | #codeeditor > #codeeditorop {
399 | padding: 2px 5px;
400 | }
401 | #codeeditor > #codeeditorop > .button {
402 | float: right;
403 | min-width: 60px;
404 | min-height: 26px;
405 | line-height: 26px;
406 | margin-right: 10px;
407 | }
408 |
409 | table {
410 | border-collapse: collapse;
411 | }
412 | table td, table th {
413 | border: 1px gray solid;
414 | padding: 2px 5px;
415 | vertical-align: top;
416 | }
417 | table tr:first-child th, table tr:first-child td {
418 | border-top: 0;
419 | }
420 | table td:first-child, table th:first-child {
421 | border-left: 0;
422 | }
423 | table td:last-child, table th:last-child {
424 | border-right: 0;
425 | }
426 | table tr:last-child td {
427 | border-bottom: 0;
428 | }
429 |
430 | #tblarea {
431 | flex: 1;
432 | display: flex;
433 | flex-direction: column;
434 | min-height: 5px;
435 | }
436 | #tblsummary {
437 | width: 350px;
438 | text-align: center;
439 | }
440 | #tblbottom {
441 | border-top: 1px solid gray;
442 | flex: 1;
443 | color: gray;
444 | user-select: none;
445 | }
446 | #tblbottom > div {
447 | padding: 2px 10px;
448 | }
449 | #tblnoresult {
450 | text-align: center;
451 | }
452 | #tblhint > span {
453 | color: rgb(64, 128, 64);
454 | }
455 | #tblop {
456 | display: flex;
457 | border-bottom: 1px solid black;
458 | user-select: none;
459 | }
460 | #tblop > div {
461 | padding: 2px 4px;
462 | }
463 | #tblop > div:not(:first-child) {
464 | border-left: 1px solid black;
465 | }
466 | #tblop > div > span {
467 | line-height: 24px;
468 | vertical-align: top;
469 | display: inline-block;
470 | }
471 | #tblop select {
472 | border: 1px solid black;
473 | background-color: azure;
474 | padding: 2px;
475 | width: 100px;
476 | }
477 | #tblfltcustom {
478 | flex: 1;
479 | margin: 2px;
480 | padding: 2px;
481 | border: 1px solid black;
482 | font-family: monospace;
483 | }
484 | #text {
485 | flex: 1;
486 | overflow-y: scroll;
487 | display: flex;
488 | flex-direction: column;
489 | }
490 | #text > table {
491 | width: 100%;
492 | }
493 | #text > table th {
494 | white-space: nowrap;
495 | }
496 | #text > table td:not(:nth-child(4)) {
497 | white-space: nowrap;
498 | }
499 | #text > table td:nth-child(4) {
500 | width: 100%;
501 | white-space: pre-wrap;
502 | }
503 | #text > table tr {
504 | scroll-margin-bottom: 30px;
505 | }
506 | #text > table tr[data-state='waitocr'] {
507 | background-color: lightgray;
508 | }
509 | #text > table tr[data-state='error'] {
510 | background-color: lightpink;
511 | }
512 | #text > table tr[data-state='merged'] {
513 | background-color: lightgray;
514 | }
515 | #text > table tbody tr:hover {
516 | background-color: lemonchiffon !important;
517 | }
518 | #text > table tr td:nth-child(1) {
519 | color: gray;
520 | }
521 | #text > table tr[data-tag~='primary'] td:nth-child(1) {
522 | color: black;
523 | }
524 | #text > table tr[data-state='done'][data-tag~='sel'] {
525 | background-color: rgb(130, 215, 255);
526 | }
527 | #text > table tr[data-state='done'][data-tag~='cur'] {
528 | background-color: yellowgreen;
529 | }
530 | #text > table tr[data-tag~='cur'] td:nth-child(2) {
531 | background-color: yellowgreen;
532 | }
533 | #text > table tr[data-tag~='cur'] td:first-child {
534 | background-color: yellowgreen;
535 | }
536 | #text > table tr[data-tag~='sel'] div.img {
537 | outline: 2px solid blue !important;
538 | }
539 | #text > table tr[data-tag~='sel'] td:first-child {
540 | background-color: rgb(130, 215, 255) !important;
541 | }
542 | #text > table tr[data-lock='locked'] {
543 | text-decoration: line-through;
544 | }
545 | #text > table tr:not([data-position="2"]) td:nth-child(2) {
546 | color: gray;
547 | }
548 | #text > table tr[data-empty='empty'] td:nth-child(4) {
549 | color: gray;
550 | }
551 |
552 | #text > table tr:hover td:nth-child(4) {
553 | text-decoration: underline;
554 | }
555 | #text > table tr div.img {
556 | outline: 1px solid white;
557 | }
558 | #text > table tr:hover div.img {
559 | outline: 1px dashed red;
560 | }
561 |
562 |
563 | #logarea {
564 | overflow: hidden;
565 | }
566 | #logarea > div.buttonbar {
567 | display: inline-block;
568 | position: absolute;
569 | right: 32px;
570 | bottom: 5px;
571 | text-align: right;
572 | }
573 | #logarea > div.buttonbar .button {
574 | margin: 2px 0;
575 | }
576 | #logs {
577 | width: 100%;
578 | height: 100%;
579 | overflow-y: scroll;
580 | }
581 | #logs > table {
582 | width: 100%;
583 | }
584 | #logs > table tr[data-level='I'] {
585 | background-color: azure;
586 | }
587 | #logs > table tr[data-level='E'] {
588 | background-color: lightpink;
589 | }
590 | #logs > table tr[data-level='S'] {
591 | background-color: lightgreen;
592 | }
593 | #logs > table tr[data-level='W'] {
594 | background-color: lightyellow;
595 | }
596 | #logs > table tr[data-level='C'] {
597 | background-color: gold;
598 | }
599 | #logs > table td:not(:last-child) {
600 | white-space: nowrap;
601 | }
602 | #logs > table td:last-child {
603 | width: 100%;
604 | white-space: pre-wrap;
605 | }
606 |
607 | #promptbg {
608 | display: flex;
609 | align-items: center;
610 | justify-content: center;
611 | z-index: 20;
612 | }
613 | #promptdialog {
614 | border: 1px solid black;
615 | background-color: white;
616 | width: 350px;
617 | height: 160px;
618 | position: absolute;
619 | margin: auto;
620 | z-index: 20;
621 | display: flex;
622 | flex-direction: column;
623 | user-select: none;
624 | padding: 10px;
625 | }
626 | #promptdialog > div:nth-child(1) {
627 | padding: 10px;
628 | white-space: pre-wrap;
629 | }
630 | #promptdialog > div:nth-child(2) {
631 | flex: 1;
632 | }
633 | #promptdialog > div:nth-child(2) > input {
634 | width: 90%;
635 | margin: auto;
636 | display: block;
637 | border: 1px solid black;
638 | padding: 5px;
639 | font-size: 16px;
640 | }
641 | #promptdialog > div:nth-child(3) {
642 | text-align: right;
643 | }
644 | #promptdialog > div:nth-child(3) > a.button {
645 | padding: 10px 20px;
646 | }
647 |
648 | #adveditorbg {
649 | display: flex;
650 | align-items: center;
651 | justify-content: center;
652 | z-index: 20;
653 | }
654 | #adveditor {
655 | border: 1px solid black;
656 | background-color: white;
657 | width: 600px;
658 | height: 600px;
659 | position: absolute;
660 | margin: auto;
661 | z-index: 20;
662 | display: flex;
663 | flex-direction: column;
664 | user-select: none;
665 | padding: 10px;
666 | }
667 | #adveditor > div:nth-child(1) {
668 | flex: 1;
669 | margin-bottom: 10px;
670 | overflow: hidden;
671 | border: 1px solid gray;
672 | }
673 | #adveditor > div:nth-child(1) > .CodeMirror {
674 | height: 100%;
675 | width: 100%;
676 | }
677 | #adveditor > div:nth-child(2) {
678 | text-align: right;
679 | }
680 | #adveditor > div:nth-child(2) > a.button {
681 | padding: 10px 20px;
682 | }
--------------------------------------------------------------------------------
/ui/ui.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 | 硬字幕OCR提取工具
11 |
12 |
13 |
14 |
15 |
{{info.file}} ({{info.width}}x{{info.height}}, {{info.nframes}}帧, {{info.fps}}fps)
16 |
17 |
18 |
48 |
49 |
当前帧 ({{pos}})
50 |
51 |
![]()
52 |
53 |
54 |
55 |
56 |
57 |
78 |
79 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
{{mousepos}}
109 |
110 |
111 |
112 |
{{tblinfo}}{{tblinfo2}}
113 |
114 | 排序
115 |
118 |
119 |
120 | 筛选
121 |
124 |
125 |
126 |
127 |
128 |
129 |
130 |
131 | 帧范围 |
132 | OCR区域 |
133 | 引擎 |
134 | 文本 |
135 | 预览 |
136 |
137 |
138 |
139 |
140 |
141 |
(无结果)
142 |
单击:选择 / Ctrl+单击:多选 / Shift+单击:选择范围
143 |
144 |
145 |
146 |
147 |
148 |
149 |
选中:{{selinfo}}{{selinfo2}}
150 |
154 |
155 |
164 |
165 |
190 |
191 |
208 |
209 |
210 |
211 |
215 |
216 |
219 |
重命名
220 |
克隆
221 |
删除
222 |
新建
223 |
该脚本已锁定,不可更改
224 |
提示:按 Ctrl+Shift+I 可打开控制台。
225 |
226 |
227 |
231 |
232 |
241 |
242 |
243 |
{{myprompt.message}}
244 |
245 |
249 |
250 |
251 |
加载中,请稍候…
若数据较多,加载速度可能会比较慢
252 |
253 |
254 |
255 |
--------------------------------------------------------------------------------
/ui/ui.js:
--------------------------------------------------------------------------------
1 | "use strict";
2 |
3 | // use 'var' instead of 'let' for global variables
4 | // or compile_expr() can't capture them!!!
5 |
6 | var geval = eval;
7 | function compile_expr(gvars, param, expr) {
8 | /* NOT SAFE BUT EFFECTIVE */
9 | return geval('(function(){"use strict";let ' + Object.keys(window).filter(i => gvars.indexOf(i) < 0).join(';let ') + ';return function('+param.join(',')+'){return ('+expr+');};})()');
10 | }
11 | function compile_func(gvars, func) {
12 | return compile_expr(gvars, [], '('+func+')')();
13 | }
14 | function strcmp(a, b) {
15 | if (a < b) return -1;
16 | if (a > b) return 1;
17 | return 0;
18 | }
19 | function ziprow(col, arr) {
20 | let obj = {};
21 | col.forEach((key, i) => obj[key] = arr[i]);
22 | return obj;
23 | }
24 | function selectcol(sel, arr) {
25 | return arr.map((item) => {
26 | let obj = {};
27 | sel.forEach((key) => obj[key] = item[key]);
28 | return obj;
29 | });
30 | }
31 | function array_equal(a, b) {
32 | let n = a.length;
33 | for (let i = 0; i < n; i++) {
34 | if (a[i] !== b[i]) {
35 | return false;
36 | }
37 | }
38 | return true;
39 | }
40 | function array_cmp(a, b) {
41 | let n = a.length;
42 | for (let i = 0; i < n; i++) {
43 | if (a[i] !== b[i]) {
44 | if (typeof a[i] === 'number') {
45 | return a[i] - b[i];
46 | } else {
47 | return strcmp(a[i], b[i]);
48 | }
49 | }
50 | }
51 | return 0;
52 | }
53 | function sec2str(sec) {
54 | sec = Math.floor(sec);
55 | let s = sec % 60;
56 | let m = Math.floor(sec / 60) % 60;
57 | let h = Math.floor(sec / 60 / 60);
58 | return h.toString() +':'+ ('0'+m.toString()).slice(-2) +':'+ ('0'+s.toString()).slice(-2);
59 | }
60 |
61 | ///////////////////////////////////////////////////////////////////////////////
62 | var curedit = null;
63 | function savecur() {
64 | if (curedit) {
65 | let upd_item = id2item.get(curedit[colmap['id']]);
66 | if (upd_item && !upd_item.locked) {
67 | let upd_data = ziprow(ocrresult.col, upd_item.r);
68 | let upd_text = app.geteditboxval();
69 | if (upd_data.state == 'done' && upd_data.ocrtext != upd_text) {
70 | upd_data.ocrtext = curedit[colmap['ocrtext']] = upd_text;
71 | tbllock(upd_item);
72 | app.refreshafter(axios.post('/updateresult', {
73 | changes: selectcol(['id','ocrtext'], [upd_data]),
74 | checkpoint: '“手动修改:'+upd_data.ocrtext+'”之前',
75 | message: '手动修改已保存:'+upd_data.ocrtext,
76 | compatlog: true,
77 | }));
78 | }
79 | }
80 | }
81 | }
82 | function tbledit(item, scroll) {
83 | let newedit = item ? JSON.parse(JSON.stringify(item.r)) : null;
84 |
85 | let change_item = false;
86 | if (newedit) {
87 | if (!curedit || curedit[colmap['id']] != newedit[colmap['id']]) {
88 | change_item = true;
89 | app.setframepos(newedit[colmap['frame_start']]);
90 | }
91 | }
92 | if (newedit && newedit[colmap['state']] == 'done') {
93 | if (curedit && change_item) {
94 | savecur();
95 | }
96 | app.$refs.editbox.readOnly = item.locked;
97 | if (item.locked || change_item || (curedit && array_cmp(curedit, newedit) != 0)) {
98 | app.seteditboxval(newedit[colmap['ocrtext']]);
99 | app.$refs.editbox.focus();
100 | app.setocrsel(1, [newedit[colmap['top']], newedit[colmap['bottom']]]);
101 | if (scroll) {
102 | item.ele.scrollIntoView({block: "nearest", inline: "nearest"});
103 | }
104 | }
105 | } else {
106 | if (curedit) {
107 | savecur();
108 | }
109 | app.seteditboxval('');
110 | app.$refs.editbox.readOnly = true;
111 | app.setocrsel(0);
112 | }
113 | curedit = newedit;
114 | if (item && !tblselect.has(item)) {
115 | tblsel([item]);
116 | }
117 | updatetblstyle();
118 | }
119 | function updatetbledit() {
120 | if (curedit) {
121 | let item = id2item.get(curedit[colmap['id']]);
122 | if (item && tblselect.has(item)) {
123 | tbledit(item);
124 | } else if (tblselect.size > 0) {
125 | tbledit(tblitem.find((item) => tblselect.has(item)));
126 | } else {
127 | tbledit(null);
128 | }
129 | }
130 | }
131 |
132 | ///////////////////////////////////////////////////////////////////////////////
133 |
134 | var coldesc = [
135 | ['id', '字幕唯一内部ID(不可修改)'],
136 | ['state', '字幕状态:waitocr, error, done, merged'],
137 | ['frame_start', '字幕起始帧(含)'],
138 | ['frame_end', '字幕结束帧(含)'],
139 | ['engine', '使用的OCR引擎名'],
140 | ['top', '字幕区域Y1(含)'],
141 | ['bottom', '字幕区域Y2(含)'],
142 | ['position', '字幕位置(2下半屏, 8上半屏)'],
143 | ['ocrtext', '字幕文本'],
144 | ];
145 |
146 | var lastresult = '';
147 | var ocrresult = {col:[],row:[]};
148 | var colmap = {};
149 | var id2item = new Map(); // id => { ele: , sortkey: [], r: [c1, c2...] }
150 |
151 | var tableele = null;
152 | var tblview = new Set();
153 | var tblitem = [];
154 | var viewopt = {
155 | filter: null,
156 | sortkey: null,
157 | };
158 | var tblsortfunc = [
159 | ['上/下半屏','[-position,frame_start,frame_end,top,bottom,engine,id]'],
160 | ['帧范围','[frame_start,frame_end,-position,top,bottom,engine,id]'],
161 | ['帧OCR区域','[top,bottom,-position,frame_start,frame_end,engine,id]'],
162 | ['引擎名称','[engine,-position,frame_start,frame_end,top,bottom,id]'],
163 | ];
164 | var tblfltfunc = [
165 | ['全部','true'],
166 | ['所有待处理',"!(state == 'merged' || (state == 'done' && ocrtext == ''))"],
167 | ['所有空项',"state == 'done' && ocrtext == ''"],
168 | ['=====',null],
169 | ['上半屏','position == 8'],
170 | ['上半屏待处理',"position == 8 && !(state == 'merged' || (state == 'done' && ocrtext == ''))"],
171 | ['上半屏空项',"position == 8 && state == 'done' && ocrtext == ''"],
172 | ['=====',null],
173 | ['下半屏','position == 2'],
174 | ['下半屏待处理',"position == 2 && !(state == 'merged' || (state == 'done' && ocrtext == ''))"],
175 | ['下半屏空项',"position == 2 && state == 'done' && ocrtext == ''"],
176 | ['=====',null],
177 | ['多行',"ocrtext.includes('\\n')"],
178 | ['待清理',"state == 'merged' || (state == 'done' && ocrtext == '')"],
179 | ['等待OCR',"state == 'waitocr'"],
180 | ['OCR失败',"state == 'error'"],
181 | ['无','false'],
182 | ];
183 | function setviewopt(filter, sortkey) {
184 | if (filter !== null) {
185 | let fn = null;
186 | app.tblflterr = false;
187 | try {
188 | fn = compile_expr([], ocrresult.col, filter);
189 | } catch(e) {
190 | app.tblflterr = true;
191 | }
192 | viewopt.filter = function (item) {
193 | try {
194 | return fn.apply(this, item.r);
195 | } catch(e) {
196 | app.tblflterr = true;
197 | return false;
198 | }
199 | };
200 | }
201 | if (sortkey !== null) {
202 | let fn = compile_expr([], ocrresult.col, sortkey);
203 | viewopt.sortkey = function (item) {
204 | return fn.apply(this, item.r);
205 | };
206 | }
207 | if (filter !== null || sortkey !== null) {
208 | updateview();
209 | if (curedit) {
210 | let item = id2item.get(curedit[colmap['id']]);
211 | if (item) {
212 | item.ele.scrollIntoView({block: "nearest", inline: "nearest"});
213 | }
214 | }
215 | }
216 | }
217 | function sortedview(expr) {
218 | if (expr === undefined) {
219 | expr = '[position,frame_start,frame_end,top,bottom,engine,id]';
220 | }
221 | let fn = compile_expr([], ocrresult.col, expr);
222 | let getsortkey = function (item) {
223 | return fn.apply(this, item.r);
224 | };
225 | return Array.from(tblview).sort((a, b) => array_cmp(getsortkey(a),getsortkey(b)));
226 | }
227 |
228 | var tblselect = new Set();
229 | var tblselect0 = new Set();
230 | var tbllastsel = null;
231 |
232 | function tbllock(item) {
233 | if (item) {
234 | item.locked = true;
235 | item.ele.dataset.lock = 'locked';
236 | if (curedit && curedit[colmap['id']] == item.r[colmap['id']]) {
237 | app.$refs.editbox.readOnly = true;
238 | }
239 | }
240 | }
241 |
242 | function updatetblstyle() {
243 | let lastitem = null;
244 | tblitem.forEach((item) => {
245 | let tag = [];
246 | let state = item.r[colmap['state']];
247 | let frame_start = item.r[colmap['frame_start']];
248 | if (state == 'done') {
249 | if (!lastitem || frame_start != lastitem.r[colmap['frame_end']] + 1) {
250 | tag.push('primary');
251 | }
252 | lastitem = item;
253 | }
254 | if (tblselect.has(item)) {
255 | tag.push('sel');
256 | }
257 | if (curedit && item.r[colmap['id']] == curedit[colmap['id']]) {
258 | tag.push('cur');
259 | }
260 | let tagstr = tag.join(' ');
261 | if (item.ele.dataset.tag != tagstr) {
262 | item.ele.dataset.tag = tagstr;
263 | }
264 | let lockstr = item.locked ? 'locked' : '';
265 | if (item.ele.dataset.lock != lockstr) {
266 | item.ele.dataset.lock = lockstr;
267 | }
268 | });
269 | }
270 | function updateselect() {
271 | tblselect.forEach((item) => {
272 | if (!tblview.has(item)) {
273 | tblselect.delete(item);
274 | }
275 | });
276 | tblselect0.forEach((item) => {
277 | if (!tblview.has(item)) {
278 | tblselect0.delete(item);
279 | }
280 | });
281 | if (!tblview.has(tbllastsel)) {
282 | tbllastsel = null;
283 | }
284 |
285 | updatetbledit();
286 | updatetblstyle();
287 |
288 | if (tblselect.size == 0) {
289 | app.selinfo = '';
290 | app.selinfo2 = '空';
291 | } else if (tblselect.size == id2item.size) {
292 | app.selinfo = '全部';
293 | app.selinfo2 = ' (共'+id2item.size+'个)';
294 | } else {
295 | app.selinfo = tblselect.size.toString();
296 | app.selinfo2 = ' / ' + id2item.size.toString();
297 | }
298 | if (id2item.size != tblview.size) {
299 | app.tblinfo = '筛选后显示了'+tblview.size+'条';
300 | app.tblinfo2 = '(共'+id2item.size+'条字幕)';
301 | } else {
302 | app.tblinfo = '共'+id2item.size+'条字幕';
303 | app.tblinfo2 = '';
304 | }
305 | document.getElementById('tblnoresult').style.display = tblview.size ? 'none' : '';
306 |
307 | app.redrawtimebar();
308 | }
309 | function tblclick(e) {
310 | let item = this.myitem;
311 | e.stopPropagation();
312 | e.preventDefault();
313 |
314 | if (e.shiftKey && tbllastsel) {
315 | tblselect = new Set(tblselect0);
316 | let flag = 0;
317 | tblitem.forEach((curitem) => {
318 | if (curitem === tbllastsel) flag ^= 1;
319 | if (curitem === item) flag ^= 1;
320 | if (flag || curitem === tbllastsel || curitem === item) {
321 | tblselect.add(curitem);
322 | }
323 | });
324 | } else {
325 | if (e.ctrlKey) {
326 | if (tblselect.has(item)) {
327 | tblselect.delete(item);
328 | } else {
329 | tblselect.add(item);
330 | }
331 | } else {
332 | tblselect.clear();
333 | tblselect.add(item);
334 | }
335 | tbllastsel = item;
336 | tblselect0 = new Set(tblselect);
337 | }
338 | updateselect();
339 | if (tblselect.has(item)) {
340 | tbledit(item);
341 | }
342 | }
343 | function tblsel(list) {
344 | tblselect = new Set(list);
345 | tblselect0 = new Set(list);
346 | tbllastsel = list.length == 1 ? list[0] : null;
347 | updateselect();
348 | }
349 | function tblselnone() {
350 | tblsel([]);
351 | }
352 | function tblselall() {
353 | tblsel(tblitem);
354 | }
355 | function removeelement(item) {
356 | if (item.ele) {
357 | item.ele.remove();
358 | item.ele = null;
359 | item.tblidx = -1;
360 | }
361 | }
362 | function updateelement(item, lastele, rebuildtable) {
363 | if (item.ele === null) {
364 | item.ele = document.createElement('tr');
365 | item.ele.myitem = item;
366 | for (let i = 0; i < 5; i++) {
367 | item.ele.appendChild(document.createElement('td'));
368 | }
369 | item.ele.addEventListener('click', tblclick);
370 | item.ele.addEventListener('mousedown', function (e) {
371 | if (e.shiftKey) {
372 | e.preventDefault();
373 | }
374 | });
375 | item.ele.dataset.sel = '';
376 | if (!rebuildtable) {
377 | if (lastele === null) {
378 | tableele.tBodies[0].prepend(item.ele);
379 | } else {
380 | lastele.after(item.ele);
381 | }
382 | }
383 | }
384 | let row = ziprow(ocrresult.col, item.r);
385 | item.ele.dataset.state = row.state;
386 | item.ele.dataset.empty = row.ocrtext == '' ? 'empty' : '';
387 | item.ele.dataset.position = row.position.toString();
388 | item.ele.children[0].title = sec2str(row.frame_start / app.info.fps);
389 | item.ele.children[0].innerText = row.frame_start + '-' + row.frame_end;
390 | item.ele.children[1].innerText = row.top + '-' + row.bottom;
391 | item.ele.children[2].innerText = row.engine;
392 | item.ele.children[3].textContent = row.state == 'done' ? (row.ocrtext == '' ? '(空)' : row.ocrtext.replaceAll('\n','\\n\n')) :
393 | row.state == 'waitocr' ? '等待OCR' :
394 | row.state == 'error' ? 'OCR失败:' + row.ocrtext :
395 | row.state == 'merged' ? row.comment : row.state;
396 | if (row.file_id) {
397 | let img = document.createElement('div');
398 | img.classList.add('img');
399 | img.style.backgroundImage = 'url(/file?id=' + row.file_id + ')';
400 | img.style.height = row.imgdb_h+'px';
401 | img.style.width = row.imgdb_w+'px';
402 | img.style.backgroundPositionX = '-'+row.imgdb_l+'px';
403 | img.style.backgroundPositionY = '-'+row.imgdb_t+'px';
404 | item.ele.children[4].innerHTML = '';
405 | item.ele.children[4].appendChild(img);
406 | }
407 | }
408 | function updateview(needdelete, needupdate) {
409 | let fullupdate = (needdelete === undefined && needupdate === undefined);
410 | if (fullupdate) {
411 | tblview.forEach((item) => removeelement(item));
412 | tblview.clear();
413 | needdelete = new Set();
414 | needupdate = new Set(id2item.values());
415 | app.tblflterr = false;
416 | }
417 | let tbllast = tblview.size;
418 | let tblchange = 0;
419 | needupdate.forEach((item) => {
420 | if (viewopt.filter(item)) {
421 | tblview.add(item);
422 | item.sortkey = viewopt.sortkey(item);
423 | if (!item.ele) {
424 | tblchange++;
425 | }
426 | } else {
427 | needdelete.add(item);
428 | }
429 | });
430 | needdelete.forEach((item) => {
431 | tblview.delete(item);
432 | needupdate.delete(item);
433 | if (item.ele) {
434 | tblchange++;
435 | }
436 | removeelement(item);
437 | });
438 |
439 | tblitem = Array.from(tblview.values());
440 | tblitem.sort((a, b) => array_cmp(a.sortkey, b.sortkey));
441 |
442 | let rebuildtable = false;
443 | let lasttblidx = -1;
444 | for (let item of tblitem) {
445 | if (item.tblidx >= 0) {
446 | if (item.tblidx < lasttblidx) {
447 | rebuildtable = true;
448 | break;
449 | }
450 | lasttblidx = item.tblidx;
451 | }
452 | }
453 |
454 | if (navigator.userAgent.toLowerCase().indexOf('firefox') > -1) {
455 | rebuildtable = rebuildtable || tblchange > Math.sqrt(tbllast);
456 | }
457 |
458 | tableele = document.getElementById('ocrresult');
459 |
460 | let lastele = null;
461 | tblitem.forEach((item, i) => {
462 | item.tblidx = i;
463 | if (needupdate.has(item)) {
464 | updateelement(item, lastele, rebuildtable);
465 | }
466 | lastele = item.ele;
467 | });
468 |
469 | if (rebuildtable) {
470 | tableele.tBodies[0].innerHTML = '';
471 | tblitem.forEach((item) => tableele.tBodies[0].appendChild(item.ele));
472 | }
473 |
474 | updateselect();
475 | }
476 | function updateresult(jsonstr) {
477 | let needdelete = new Set();
478 | let needupdate = new Set();
479 |
480 | if (lastresult !== jsonstr) {
481 | ocrresult = JSON.parse(lastresult = jsonstr);
482 |
483 | ocrresult.col.forEach((k, i) => colmap[k] = i);
484 | if (viewopt.filter === null || viewopt.sortkey === null) {
485 | setviewopt(app.tblflt, app.tblsort);
486 | }
487 |
488 | let delmap = new Map(id2item);
489 | ocrresult.row.forEach((r, i) => delmap.delete(r[colmap['id']]));
490 | delmap.forEach((item, id) => id2item.delete(id));
491 | needdelete = new Set(delmap.values());
492 |
493 | for (let r of ocrresult.row) {
494 | let id = r[colmap['id']];
495 | let item = id2item.get(id);
496 | if (item === undefined) {
497 | item = { ele: null, sortkey: null, tblidx: -1, locked: false, mergedto: null, r: r };
498 | id2item.set(id, item);
499 | } else {
500 | item.locked = false;
501 | if (array_equal(item.r, r)) {
502 | continue;
503 | }
504 | item.mergedto = null;
505 | item.r = r;
506 | }
507 | needupdate.add(item);
508 | }
509 | } else {
510 | for (let r of ocrresult.row) {
511 | let id = r[colmap['id']];
512 | let item = id2item.get(id);
513 | if (item !== undefined) {
514 | item.locked = false;
515 | }
516 | }
517 | }
518 |
519 | let changed = needdelete.size > 0 || needupdate.size > 0;
520 |
521 | updateview(needdelete, needupdate); // may modify needdelete, needupdate
522 |
523 | return changed;
524 | }
525 |
526 |
527 | ///////////////////////////////////////////////////////////////////////////////
528 |
529 | Vue.component('imgdb', {
530 | props: ['src'],
531 | data() {
532 | return {
533 | imgloaded: false,
534 | };
535 | },
536 | template:
537 | `
538 |
![]()
539 |
`,
540 | watch: {
541 | src: {
542 | deep: true,
543 | handler(newsrc, oldsrc) {
544 | if (newsrc.hasOwnProperty('file_id') && oldsrc.hasOwnProperty('file_id')) {
545 | if (newsrc.file_id === oldsrc.file_id) {
546 | if (this.imgloaded) {
547 | this.$emit('my-load');
548 | return;
549 | }
550 | }
551 | }
552 | this.imgloaded = false;
553 | },
554 | },
555 | },
556 | methods: {
557 | imgload() {
558 | if (!this.imgloaded) {
559 | this.imgloaded = true;
560 | this.$emit('my-load');
561 | }
562 | },
563 | },
564 | })
565 |
566 | var app;
567 | app = new Vue({
568 | el: '#app',
569 | data: {
570 | loaded: 0,
571 | info: {file: '', width: 100, height: 100, thumb_w: 100, thumb_h: 100, fps: 0, nframes: 100},
572 | thumbnail: {col:[],row:[]},
573 |
574 | mousexy: [-1, -1], // frame image
575 | mousexydown: 0,
576 | ocrsel_origin: -1,
577 | ocrsel: [-1, -1],
578 | ocrselsv: [-1, -1],
579 | ocrselmode: 0,
580 | framescale: 1,
581 |
582 | pos: -1,
583 | postitle: '',
584 | mousepos: -1, mouseposdown: 0, // timebar
585 | pixeldata: null,
586 |
587 | editorup: 0,
588 | editboxundoinfo: null,
589 | editboxredoinfo: null,
590 | editorfontsize: 20,
591 | adveditor: 0,
592 | adveditcm: null,
593 |
594 | tblsortfunc: tblsortfunc,
595 | tblsort: tblsortfunc[0][1],
596 | tblfltfunc: tblfltfunc,
597 | tblflt: tblfltfunc[0][1],
598 | tblfltedit: tblfltfunc[0][1],
599 | tblflterr: false,
600 | selinfo: '',
601 | selinfo2: '',
602 | tblinfo: '',
603 | tblinfo2: '',
604 |
605 | logs: {col:[],row:[]},
606 | checkpointonly: 0,
607 | morelog: 0,
608 | waitstatus: 0,
609 | noautoscrollstatus: 0,
610 | pageerror: 0,
611 | statetimer: null,
612 | rerefresh: false,
613 |
614 | ocrconfig: { engine: '', top: -1, bottom: -1 },
615 | engines: new Map(),
616 |
617 | codeeditor: 0,
618 | codeedit: null,
619 | myscript: { lastid: 0, scripts: [] },
620 | scriptuploadtimer: null,
621 | scriptsel: null,
622 | scriptsel2: null,
623 | scripttemplate: `// 自定义脚本
624 | function (items) {
625 | for (let item of items) {
626 | ` + coldesc.map((d) => ' // item.' + d[0] + ' '.repeat(15 - d[0].length) + d[1]).join('\n') +
627 | `
628 | }
629 | //return '处理完毕';
630 | }
631 | `,
632 | defaultscripts: [
633 | { name: '合并相同', locked: true, value: `// 合并相同:合并相邻且文字完全相同的字幕
634 | function (items) {
635 | if (items.length < 2) {
636 | alert('请选择多条字幕');
637 | return '没有合并字幕';
638 | }
639 | let nmerged = 0;
640 | let lastitem = null;
641 | for (let item of items) {
642 | if (lastitem != null) {
643 | if (item.state == 'done' && lastitem.state == 'done') { // 都是已OCR字幕
644 | if (item.frame_start - 1 == lastitem.frame_end) { // 相邻
645 | if (item.ocrtext == lastitem.ocrtext) { // 文字完全相同
646 | lastitem.frame_end = item.frame_end;
647 | item.state = 'merged';
648 | item.comment = '已合并到上一字幕';
649 | nmerged++;
650 | continue;
651 | }
652 | }
653 | }
654 | }
655 | if (item.state != 'merged') {
656 | lastitem = item;
657 | }
658 | }
659 | return '合并了' + nmerged + '条字幕';
660 | }
661 | ` },
662 | { name: '强制合并相邻', locked: true, value: `// 强制合并相邻:合并相邻字幕,不论文字是否相同
663 | function (items) {
664 | if (items.length < 2) {
665 | alert('请选择多条字幕');
666 | return '没有合并字幕';
667 | }
668 | if (confirm('确定要强制合并相邻字幕吗?')) {
669 | let nmerged = 0;
670 | let lastitem = null;
671 | for (let item of items) {
672 | if (lastitem != null) {
673 | if (item.frame_start - 1 == lastitem.frame_end) { // 相邻
674 | lastitem.frame_end = item.frame_end;
675 | item.state = 'merged';
676 | item.comment = '已合并到上一字幕';
677 | nmerged++;
678 | continue;
679 | }
680 | }
681 | if (item.state == 'done') {
682 | lastitem = item;
683 | }
684 | }
685 | return '强制合并了' + nmerged + '条相邻字幕';
686 | } else {
687 | return '没有合并字幕';
688 | }
689 | }
690 | ` },
691 | { name: '强制合并', locked: true, value: `// 强制合并:合并字幕,不论文字是否相同,也不论是否相邻
692 | function (items) {
693 | if (items.length < 2) {
694 | alert('请选择多条字幕');
695 | return '没有合并字幕';
696 | }
697 | if (confirm('确定要强制合并吗?')) {
698 | let nmerged = 0;
699 | let lastitem = null;
700 | for (let item of items) {
701 | if (lastitem != null) {
702 | lastitem.frame_end = item.frame_end;
703 | item.state = 'merged';
704 | item.comment = '已合并到上一字幕';
705 | nmerged++;
706 | continue;
707 | }
708 | if (item.state == 'done') {
709 | lastitem = item;
710 | }
711 | }
712 | return '强制合并了' + nmerged + '条字幕';
713 | } else {
714 | return '没有合并字幕';
715 | }
716 | }
717 | ` },
718 | { name: '清理', locked: true, value: `// 清理:删除空字幕、被合并字幕
719 | function (items) {
720 | if (confirm('确定要删除空字幕、被合并字幕吗?\\n提示:是否已检查“假空项”(有文字但被误识别为无文字)?')) {
721 | let nempty = 0, nmerged = 0;
722 | for (let item of items) {
723 | // 删除空字幕
724 | if (item.state == 'done' && item.ocrtext == '') {
725 | item.state = 'delete';
726 | nempty++;
727 | }
728 | // 删除被合并字幕
729 | if (item.state == 'merged') {
730 | item.state = 'delete';
731 | nmerged++;
732 | }
733 | }
734 | return '清理了' + nempty + '条空字幕,' + nmerged + '条被合并字幕';
735 | }
736 | return '没有清理字幕';
737 | }
738 | ` },
739 | { name: '删除', locked: true, value: `// 删除:删除操作范围内的所有字幕
740 | function (items) {
741 | let n = items.length;
742 | if (confirm('确定要删除'+ n +'条字幕吗?')) {
743 | for (let item of items) {
744 | item.state = 'delete';
745 | }
746 | return '删除了' + n + '条字幕';
747 | }
748 | return '没有删除字幕';
749 | }
750 | ` },
751 | { name: '克隆', locked: true, value: `// 克隆:克隆一份选中的字幕
752 | function (items) {
753 | let n = items.length;
754 | if (confirm('确定要克隆'+ n +'条字幕吗?')) {
755 | for (let i = 0; i < n; i++) {
756 | let newitem = JSON.parse(JSON.stringify(items[i]));
757 | newitem.id = 0;
758 | items.push(newitem);
759 | }
760 | return '克隆了'+ n + '条字幕';
761 | }
762 | return '没有克隆字幕';
763 | }
764 | ` },
765 | ],
766 |
767 | myprompt: null,
768 | },
769 | watch: {
770 | pos(newpos) {
771 | //updateselect();
772 | [this.$refs.prevframe, this.$refs.curframe, this.$refs.nextframe].forEach((ref) => {
773 | ref.style.color = 'gray';
774 | ref.style.textDecoration = 'line-through';
775 | });
776 | this.postitle = sec2str(newpos / this.info.fps);
777 | },
778 | editorfontsize() { this.saveui(); },
779 | editorup() { this.saveui(); },
780 | tblsort(newsort, oldsort) {
781 | if (newsort !== oldsort) {
782 | setviewopt(null, newsort);
783 | }
784 | },
785 | tblflt(newflt, oldflt) {
786 | if (newflt != oldflt) {
787 | this.tblfltedit = newflt;
788 | setviewopt(newflt, null);
789 | }
790 | },
791 | },
792 | async mounted() {
793 | try {
794 | let session = (await axios.post('/session')).data;
795 | axios.defaults.headers.common['X-VIDEO2SUB-SESSION'] = session;
796 | this.info = (await axios.post('/info')).data;
797 | document.title += ':' + this.info.file;
798 | this.ocrconfig = await this.loadconfig('OCR')
799 | this.setocrsel(0);
800 | this.engines = new Map((await axios.post('/allengines')).data);
801 | await this.loadui();
802 | this.myscript = await this.loadconfig('SCRIPT', { lastid: 0, scripts: [] });
803 | this.myscript.scripts = this.defaultscripts.concat(this.myscript.scripts.filter((s)=>!s.locked));
804 | this.scriptsel = this.myscript.scripts.slice(-1)[0];
805 | this.pos = 0;
806 | tbledit(null);
807 | window.onmouseup = (e) => this.appmouseup(e);
808 | window.onmousemove = (e) => this.appmousemove(e);
809 | window.addEventListener("resize", this.appresize);
810 | this.codeedit = CodeMirror(this.$refs.codemirror, {
811 | value: '',
812 | mode: 'javascript',
813 | lineNumbers: true,
814 | tabSize: 2,
815 | });
816 | this.adveditcm = CodeMirror(this.$refs.adveditbox, {
817 | value: '',
818 | mode: 'javascript',
819 | tabSize: 2,
820 | });
821 | this.codeedit.on('change', () => this.savescript());
822 | this.refresh();
823 | } catch (e) {
824 | console.log(e);
825 | this.$refs.loading.innerHTML = '加载时发生错误,请重新启动本程序!!!';
826 | }
827 | },
828 | methods: {
829 | ziprowidx(dbresult, idx) {
830 | return ziprow(dbresult.col, dbresult.row[idx]);
831 | },
832 | ziprowarr(dbresult) {
833 | return dbresult.row.map((r) => ziprow(dbresult.col, r));
834 | },
835 |
836 | exportass() {
837 | if (Array.from(id2item.values()).some((item) => item.r[colmap['state']] != 'done' || item.r[colmap['ocrtext']] == '')) {
838 | if (!confirm('有待清理或待处理条目,确认要继续导出吗?')) {
839 | return;
840 | }
841 | }
842 | this.setwaitstatus();
843 | this.refreshafter(axios.post('/exportass'));
844 | },
845 | exportcsv() {
846 | this.setwaitstatus();
847 | this.refreshafter(axios.post('/exportcsv'));
848 | },
849 | importcsv(asnew) {
850 | let fileele = [this.$refs.csvfile0, this.$refs.csvfile1][asnew];
851 | let fromdata = new FormData();
852 | if (fileele.files.length > 0) {
853 | fromdata.append("csv", fileele.files[0]);
854 | fromdata.append('asnew', asnew);
855 | fromdata.append('checkpoint', asnew ? '执行“添加CSV”之前' : '执行“导入CSV”之前');
856 | this.setwaitstatus();
857 | this.refreshafter(axios.post('/importcsv', fromdata, {
858 | headers: {
859 | 'Content-Type': 'multipart/form-data'
860 | }
861 | }));
862 | fileele.value = '';
863 | }
864 | },
865 |
866 | setframescale(h) { this.framescale = h / this.info.height; this.saveui(); },
867 | getdisph() { return Math.round(this.info.height*this.framescale); },
868 | getdispw() { return Math.round(this.info.width*this.framescale); },
869 |
870 | jumpframe(expr) {
871 | this.showmyprompt('请输入表达式:\n如“100”,“n+10”,“n-10”,“n+fps*10”', expr !== undefined ? expr : this.pos.toString(), (val) => {
872 | if (val !== null) {
873 | let next;
874 | try {
875 | next = Math.round(compile_expr([], ['n','fps'], val)(this.pos, this.info.fps));
876 | if (isNaN(next)) {
877 | throw 'nan';
878 | }
879 | } catch (e) {
880 | alert('求值时发生错误!');
881 | this.$nextTick(function() {
882 | this.jumpframe(val);
883 | });
884 | return;
885 | }
886 | this.setframepos(next, 1);
887 | }
888 | });
889 | },
890 |
891 | y2percent(y) {
892 | return y / this.info.height * 100 + '%';
893 | },
894 | framemouse(e) {
895 | let rect = this.$refs.frameevent.getBoundingClientRect();
896 | return [
897 | Math.max(0, Math.min(this.info.width - 1, Math.round((e.clientX - rect.left) / (rect.right - rect.left) * this.info.width))),
898 | Math.max(0, Math.min(this.info.height - 1, Math.round((e.clientY - rect.top) / (rect.bottom - rect.top) * this.info.height)))
899 | ];
900 | },
901 | framemousemove(e) {
902 | this.mousexy = this.framemouse(e);
903 | },
904 | videomousemove(e) {
905 | if (this.mousexydown) {
906 | let y = this.framemouse(e)[1];
907 | this.ocrsel = [this.ocrsel_origin, y].sort((a, b) => a - b);
908 | //console.log(this.ocrsel);
909 | }
910 | },
911 | framemousedown(e) {
912 | this.mousexydown = 1;
913 | let y = this.framemouse(e)[1];
914 | this.ocrsel_origin = y;
915 | this.ocrsel = [y, y];
916 | },
917 | framemouseup(e) {
918 | if (this.mousexydown) {
919 | this.mousexydown = 0;
920 | this.framemousemove(e);
921 | this.videomousemove(e);
922 | if (confirm(this.ocrselmode?'确定要修改选中的'+tblselect.size+'条字幕的OCR区域吗?':'确定要修改新项目的OCR区域吗?')) {
923 | this.setocrsel(-1, this.ocrsel);
924 | this.saveocrsel();
925 | } else {
926 | this.setocrsel(-1);
927 | }
928 | }
929 | },
930 | inputocrsel() {
931 | this.showmyprompt((this.ocrselmode?'修改选中的'+tblselect.size+'条字幕的OCR区域':'修改新项目的OCR区域') + '\n请输入OCR区域信息,格式为“Y1-Y2”', (this.ocrsel[0]>=0 ? this.ocrsel[0]+'-'+this.ocrsel[1] : ''), (val) => {
932 | if (val !== null) {
933 | let newsel = val.split('-').map((v) => parseInt(v, 10));
934 | if (newsel.length == 2 && newsel[0] >= 0 && newsel[1] >= 0) {
935 | newsel.sort((a, b) => a - b);
936 | if (newsel[1] < this.info.height) {
937 | this.setocrsel(-1, newsel);
938 | this.saveocrsel();
939 | return;
940 | }
941 | }
942 | alert('无效值');
943 | }
944 | })
945 | },
946 | saveocrsel() {
947 | if (this.ocrselmode == 0) {
948 | this.ocrconfig.top = this.ocrsel[0];
949 | this.ocrconfig.bottom = this.ocrsel[1];
950 | this.saveconfig('OCR', this.ocrconfig, 'OCR区域已设定 (Y1='+this.ocrconfig.top+', Y2='+this.ocrconfig.bottom+')');
951 | } else if (this.ocrselmode == 1) {
952 | let selrange = tblitem.filter((item) => tblselect.has(item));
953 | let changes = selrange.map((item) => ziprow(ocrresult.col, item.r));
954 | let top = this.ocrsel[0], bottom = this.ocrsel[1];
955 | changes.forEach((item) => (item.top = top, item.bottom = bottom));
956 | if (curedit) {
957 | curedit[colmap['top']] = top;
958 | curedit[colmap['bottom']] = bottom;
959 | }
960 | app.refreshafter(axios.post('/updateresult', {
961 | changes: selectcol(['id','top','bottom'], changes),
962 | checkpoint: '“修改'+selrange.length+'条字幕的OCR范围”之前',
963 | message: '已修改'+selrange.length+'条字幕的OCR范围',
964 | compatlog: true,
965 | }));
966 | selrange.forEach((item) => tbllock(item));
967 | }
968 | },
969 | setocrsel(mode, newsel) {
970 | if (mode == 1) {
971 | this.ocrsel = [newsel[0], newsel[1]];
972 | this.ocrselmode = 1;
973 | } else if (mode == 0) {
974 | this.ocrsel = [this.ocrconfig.top, this.ocrconfig.bottom];
975 | this.ocrselmode = 0;
976 | } else {
977 | if (newsel) {
978 | this.ocrsel = [newsel[0], newsel[1]];
979 | } else {
980 | this.ocrsel = [this.ocrselsv[0], this.ocrselsv[1]];
981 | }
982 | }
983 | this.ocrselsv = [this.ocrsel[0], this.ocrsel[1]];
984 | },
985 |
986 |
987 | imgload(ref) {
988 | ref.style.color = 'black';
989 | ref.style.textDecoration = '';
990 | },
991 |
992 | seteditboxvalinternal(s) {
993 | this.$refs.editbox.value = '';
994 | this.$refs.editbox.value = s;
995 | },
996 | geteditboxval() {
997 | return this.$refs.editbox.value.replaceAll('\\n', '\n');
998 | },
999 | seteditboxval(s) {
1000 | this.seteditboxvalinternal(s.replaceAll('\n', '\\n'));
1001 | this.$refs.editbox.setSelectionRange(0, 0);
1002 | this.editboxsavehistory(-1, -1);
1003 | },
1004 | editboxselinfo() {
1005 | let v = this.$refs.editbox.value;
1006 | let st = this.$refs.editbox.selectionStart;
1007 | let ed = this.$refs.editbox.selectionEnd;
1008 | return [v, st, ed];
1009 | },
1010 | editboxsavehistory(undo, redo) {
1011 | this.editboxundoinfo = undo > 0 ? this.editboxselinfo() : (undo < 0 ? null : [null,null,null]);
1012 | this.editboxredoinfo = redo > 0 ? this.editboxselinfo() : (redo < 0 ? null : [null,null,null]);
1013 | },
1014 | editboxundo(e) {
1015 | if (this.editboxundoinfo) {
1016 | let [v, st, ed] = this.editboxundoinfo;
1017 | if (v !== null) {
1018 | this.editboxsavehistory(0, 1);
1019 | this.seteditboxvalinternal(v);
1020 | this.$refs.editbox.selectionStart = st;
1021 | this.$refs.editbox.selectionEnd = ed;
1022 | }
1023 | e.preventDefault();
1024 | }
1025 | },
1026 | editboxredo(e) {
1027 | if (this.editboxredoinfo) {
1028 | let [v, st, ed] = this.editboxredoinfo;
1029 | if (v !== null) {
1030 | this.editboxsavehistory(1, 0);
1031 | this.seteditboxvalinternal(v);
1032 | this.$refs.editbox.selectionStart = st;
1033 | this.$refs.editbox.selectionEnd = ed;
1034 | }
1035 | e.preventDefault();
1036 | }
1037 | },
1038 | editboxinput(e) {
1039 | if (!e.isComposing) {
1040 | this.editboxsavehistory(-1, -1);
1041 | }
1042 | },
1043 | editboxcut(e) {
1044 | let [v, st, ed] = this.editboxselinfo();
1045 | if (v.substring(st, ed).length > 0) {
1046 | this.editboxsavehistory(1, 0);
1047 | e.clipboardData.setData('text/plain', v.substring(st, ed).replaceAll('\\n', '\n'));
1048 | this.seteditboxvalinternal(v.substring(0, st) + v.substring(ed, v.length));
1049 | this.$refs.editbox.selectionStart = this.$refs.editbox.selectionEnd = st;
1050 | }
1051 | e.preventDefault();
1052 | },
1053 | editboxcopy(e) {
1054 | let [v, st, ed] = this.editboxselinfo();
1055 | if (v.substring(st, ed).length > 0) {
1056 | e.clipboardData.setData('text/plain', v.substring(st, ed).replaceAll('\\n', '\n'));
1057 | }
1058 | e.preventDefault();
1059 | },
1060 | editboxpaste(e) {
1061 | let s = e.clipboardData.getData('text/plain').replaceAll('\n', '\\n');
1062 | if (s.length > 0) {
1063 | this.editboxsavehistory(1, 0);
1064 | let [v, st, ed] = this.editboxselinfo();
1065 | this.seteditboxvalinternal(v.substring(0, st) + s + v.substring(ed, v.length));
1066 | this.$refs.editbox.selectionStart = this.$refs.editbox.selectionEnd = st + s.length;
1067 | }
1068 | e.preventDefault();
1069 | },
1070 |
1071 | findneighbor(curid) {
1072 | if (curid === undefined) {
1073 | if (!curedit) return [null, null, null];
1074 | curid = curedit[colmap['id']];
1075 | }
1076 | let curr = null;
1077 | let prev = null;
1078 | let next = null;
1079 | let multisel = tblselect.size > 1;
1080 | for (let i = 0; i < tblitem.length; i++) {
1081 | let item = tblitem[i];
1082 | if (multisel && !tblselect.has(item)) {
1083 | continue;
1084 | }
1085 | if (item.r[colmap['id']] == curid) {
1086 | curr = item;
1087 | }
1088 | if (item.r[colmap['state']] == 'done') {
1089 | if (curr === null) {
1090 | prev = item;
1091 | } else if (curr !== item) {
1092 | next = item;
1093 | break;
1094 | }
1095 | }
1096 | }
1097 | return [prev, curr, next];
1098 | },
1099 | mergeneighbor(prev, curr, replace, nextcursor) {
1100 | if (prev && curr && !curr.locked) {
1101 | let item = ziprow(ocrresult.col, curr.r);
1102 | if (item.state != 'done') {
1103 | return;
1104 | }
1105 | let trueprev = prev;
1106 | while (trueprev.mergedto !== null) {
1107 | trueprev = trueprev.mergedto;
1108 | }
1109 | if (item.frame_start - 1 == prev.r[colmap['frame_end']]) { // 相邻
1110 | let lastitem = ziprow(ocrresult.col, trueprev.r);
1111 | lastitem.frame_end = item.frame_end;
1112 | if (replace) {
1113 | lastitem.ocrtext = this.geteditboxval();
1114 | }
1115 | item.state = 'merged';
1116 | item.comment = '已合并到上一字幕';
1117 | if (nextcursor) tbledit(nextcursor, 1);
1118 | tbllock(prev);
1119 | tbllock(trueprev);
1120 | tbllock(curr, true);
1121 | curr.mergedto = trueprev;
1122 | updatetbledit();
1123 | this.refreshafter(axios.post('/updateresult', {
1124 | changes: selectcol(['id','frame_end'].concat(replace ? ['ocrtext'] : []), [lastitem]).concat(selectcol(['id','state','comment'], [item])),
1125 | checkpoint: '“与上条合并:'+lastitem.ocrtext+'”之前',
1126 | message: '已合并到上一字幕:'+lastitem.ocrtext,
1127 | compatlog: true,
1128 | }));
1129 | //console.log(ziprow(ocrresult.col,trueprev.r), ziprow(ocrresult.col,prev.r), ziprow(ocrresult.col,curr.r));
1130 | } else {
1131 | alert('起始帧与上条字幕结束帧不相邻,拒绝合并');
1132 | }
1133 | }
1134 | },
1135 | mergeprev() {
1136 | let [prev, curr, next] = this.findneighbor();
1137 | this.mergeneighbor(prev, curr, false, next);
1138 | },
1139 | replaceprev() {
1140 | let [prev, curr, next] = this.findneighbor();
1141 | this.mergeneighbor(prev, curr, true, next);
1142 | },
1143 | jumpprev() {
1144 | if (!curedit) {
1145 | tbledit(tblitem.find((item) => tblselect.has(item)), 1);
1146 | return;
1147 | }
1148 | let prev = this.findneighbor()[0];
1149 | if (prev) {
1150 | tbledit(prev, 1);
1151 | } else {
1152 | savecur();
1153 | }
1154 | },
1155 | jumpnext() {
1156 | if (!curedit) {
1157 | tbledit(tblitem.find((item) => tblselect.has(item)), 1);
1158 | return;
1159 | }
1160 | let next = this.findneighbor()[2];
1161 | if (next) {
1162 | tbledit(next, 1);
1163 | } else {
1164 | savecur();
1165 | }
1166 | },
1167 | editorkeydown(e) {
1168 | if (e.keyCode == 90 && e.ctrlKey) {
1169 | if (e.shiftKey) {
1170 | this.editboxredo(e);
1171 | } else {
1172 | this.editboxundo(e);
1173 | }
1174 | return;
1175 | }
1176 | if (e.keyCode == 38) {
1177 | this.jumpprev();
1178 | } else if (e.keyCode == 40 || (e.keyCode == 13 && !e.ctrlKey)) {
1179 | this.jumpnext();
1180 | } else if (e.keyCode == 13 && e.ctrlKey) {
1181 | if (e.shiftKey) {
1182 | this.replaceprev();
1183 | } else {
1184 | this.mergeprev();
1185 | }
1186 | } else if (e.key === "Escape") {
1187 | curedit = null;
1188 | tbledit(null);
1189 | } else {
1190 | return;
1191 | }
1192 | e.preventDefault();
1193 | },
1194 | editorwheel(e) {
1195 | if (e.deltaY > 0) {
1196 | this.jumpnext();
1197 | } else if (e.deltaY < 0) {
1198 | this.jumpprev();
1199 | } else {
1200 | return;
1201 | }
1202 | e.preventDefault();
1203 | },
1204 | jumppos(t) {
1205 | if (curedit) {
1206 | this.pos = [curedit[colmap['frame_start']], curedit[colmap['frame_end']]][t];
1207 | }
1208 | },
1209 |
1210 | advedit() {
1211 | if (curedit) {
1212 | //console.log(ziprow(ocrresult.col, curedit));
1213 | this.adveditor = 1;
1214 | let lines = [];
1215 | for (let pair of coldesc) {
1216 | if (pair[0] == 'id') continue;
1217 | lines.push('')
1218 | lines.push(' // ' + pair[1])
1219 | lines.push(' "' + pair[0] + '": ' + JSON.stringify(curedit[colmap[pair[0]]]) + ',');
1220 | }
1221 | this.$nextTick(function() {
1222 | this.adveditcm.refresh();
1223 | });
1224 | this.adveditcm.getDoc().setValue('{' + lines.join('\n') + '\n}\n');
1225 | this.adveditcm.scrollTo(0, 0);
1226 | }
1227 | },
1228 | adveditok() {
1229 | let changes;
1230 | try {
1231 | changes = compile_expr([], [], this.adveditcm.getDoc().getValue())();
1232 | } catch (e) {
1233 | console.log(e);
1234 | alert('错误: ' + ('lineNumber' in e ? '('+e.lineNumber+'行 '+e.columnNumber+'列)' : '') + '\n' + e);
1235 | return;
1236 | }
1237 |
1238 | let newitem = { id: curedit[colmap['id']] };
1239 | for (let [key, value] of Object.entries(changes)) {
1240 | if (key == 'id') continue;
1241 | newitem[key] = value;
1242 | }
1243 | this.refreshafter(axios.post('/updateresult', {
1244 | changes: [newitem],
1245 | checkpoint: '“高级修改:'+JSON.stringify(changes)+'”之前',
1246 | message: '已保存高级修改:'+JSON.stringify(changes),
1247 | compatlog: true,
1248 | }));
1249 | tbllock(id2item.get(newitem.id));
1250 | this.adveditor = 0;
1251 | },
1252 | adveditcancel() {
1253 | this.adveditor = 0;
1254 | },
1255 |
1256 |
1257 | barpos(e) {
1258 | return Math.max(0, Math.min(this.info.nframes - 1,
1259 | Math.floor((e.clientX - this.$refs.barbg.getBoundingClientRect().left) / this.$refs.barbg.clientWidth * this.info.nframes)));
1260 | },
1261 | setframepos(n, scrolltype) {
1262 | this.pos = Math.max(0, Math.min(this.info.nframes - 1, n));
1263 | if (tblview.size > 0 && scrolltype) {
1264 | let tblsorted = sortedview();
1265 | let target = tblsorted.findIndex((item) => item.r[colmap['frame_start']] >= n);
1266 | let item;
1267 | if (target < 0) {
1268 | item = tblsorted.slice(-1)[0];
1269 | } else {
1270 | if (tblsorted[target].r[colmap['frame_start']] > n) {
1271 | target--;
1272 | }
1273 | item = target >= 0 ? tblsorted[target] : tblsorted[0];
1274 | }
1275 |
1276 | if (scrolltype === 2) {
1277 | tbledit(null);
1278 | item.ele.scrollIntoView(true);
1279 | } else {
1280 | item.ele.scrollIntoView({block: "nearest", inline: "nearest"});
1281 | }
1282 | }
1283 | },
1284 | setpos(e) {
1285 | this.setframepos(this.barpos(e), 2);
1286 | },
1287 | setmousepos(e) {
1288 | this.mousepos = this.barpos(e);
1289 | if (this.mouseposdown) this.setpos(e);
1290 | },
1291 | timebarmouseup(e) {
1292 | if (this.mouseposdown) {
1293 | this.mouseposdown = 0;
1294 | let rect = this.$refs.barbg.getBoundingClientRect();
1295 | if (e.clientY < rect.top || e.clientY >= rect.bottom) {
1296 | this.mousepos = -1;
1297 | }
1298 | }
1299 | },
1300 | timebarmousemove(e) {
1301 | if (this.mouseposdown) {
1302 | this.mousepos = this.barpos(e);
1303 | this.setpos(e);
1304 | }
1305 | },
1306 | pos2percent(n, leftmargin) {
1307 | let w = Math.floor(document.body.clientWidth);
1308 | let p = n / this.info.nframes * 100 + '%';
1309 | if (leftmargin) {
1310 | return 'min('+p+','+(w-leftmargin)+'px)';
1311 | } else {
1312 | return p;
1313 | }
1314 | },
1315 | redrawtimebar() {
1316 | let w = Math.floor(document.body.clientWidth);
1317 | if (this.$refs.canvas.width != w) {
1318 | this.$refs.canvas.width = w;
1319 | this.$refs.canvas.style.width = w+'px';
1320 | }
1321 | if (this.$refs.canvas.height != 1) {
1322 | this.$refs.canvas.height = 1;
1323 | }
1324 | let ctx = this.$refs.canvas.getContext('2d');
1325 | if (this.pixeldata === null || this.pixeldata.width != w) {
1326 | this.pixeldata = ctx.createImageData(w, 1);
1327 | }
1328 | let buffer = this.pixeldata.data;
1329 | buffer.fill(0);
1330 |
1331 | let state_color = [
1332 | null,
1333 | [255,255,255], // 1: not in view
1334 | [176,94,0], // 2: waitocr
1335 | [84,168,0], // 3: done (ocrtext != '')
1336 | [255,0,0], // 4: error
1337 | [0,123,255], // 5: selected
1338 | ];
1339 | let f = w / this.info.nframes;
1340 | id2item.forEach((item) => {
1341 | let p = 0;
1342 | switch (item.r[colmap['state']]) {
1343 | case 'waitocr': p = 2; break;
1344 | case 'done': p = item.r[colmap['ocrtext']] != '' ? 3 : 0; break;
1345 | case 'error': p = 4; break;
1346 | }
1347 | if (p > 0 && !tblview.has(item)) {
1348 | p = 1;
1349 | }
1350 | if (tblselect.has(item)) {
1351 | p = 5;
1352 | }
1353 | if (p > 0) {
1354 | for (let i = item.r[colmap['frame_start']]; i <= item.r[colmap['frame_end']]; i++) {
1355 | let j = Math.floor(i * f);
1356 | buffer[j * 4] = Math.max(buffer[j * 4], p);
1357 | }
1358 | }
1359 | });
1360 | for (let i = 0; i < w * 4; i += 4) {
1361 | let p = buffer[i];
1362 | if (p) {
1363 | buffer[i] = state_color[p][0];
1364 | buffer[i + 1] = state_color[p][1];
1365 | buffer[i + 2] = state_color[p][2];
1366 | buffer[i + 3] = 255;
1367 | }
1368 | }
1369 | ctx.putImageData(this.pixeldata, 0, 0);
1370 | },
1371 |
1372 | tblfltkeydown(e) {
1373 | if (e.keyCode == 13) {
1374 | e.preventDefault();
1375 | this.tblflt = this.tblfltedit;
1376 | }
1377 | },
1378 |
1379 | tblselall() {
1380 | tblselall();
1381 | },
1382 | tblselnone() { tblselnone(); },
1383 |
1384 | showcodeedit(show) {
1385 | this.codeeditor = show;
1386 | if (show) {
1387 | if (this.myscript.scripts.every((x) => x.locked)) {
1388 | this.newscript();
1389 | }
1390 | this.changescript();
1391 | this.$nextTick(function() {
1392 | this.codeedit.refresh();
1393 | });
1394 | } else {
1395 | this.savescript(true);
1396 | }
1397 | },
1398 | changescript() {
1399 | if (this.scriptsel != null) {
1400 | this.codeedit.getDoc().setValue(this.scriptsel.value);
1401 | this.scriptsel2 = this.scriptsel;
1402 | this.codeedit.scrollTo(0, 0);
1403 | this.codeedit.setOption("readOnly", this.scriptsel.locked);
1404 | this.uploadscript(true);
1405 | }
1406 | },
1407 | renamescript() {
1408 | if (this.scriptsel.locked) {
1409 | alert('不能修改此脚本!');
1410 | return;
1411 | }
1412 | this.showmyprompt('请输入新的名字', this.scriptsel.name, (newname) => {
1413 | if (newname !== null) {
1414 | this.scriptsel.name = newname;
1415 | }
1416 | });
1417 | },
1418 | clonescript() {
1419 | if (!confirm('确定要克隆一份吗?')) {
1420 | return;
1421 | }
1422 | this.myscript.scripts.push(JSON.parse(JSON.stringify(this.scriptsel)));
1423 | this.scriptsel = this.myscript.scripts.slice(-1)[0];
1424 | this.scriptsel.name += ' (副本)';
1425 | this.scriptsel.locked = false;
1426 | this.changescript();
1427 | },
1428 | delscript() {
1429 | if (this.scriptsel.locked) {
1430 | alert('不能删除此脚本!');
1431 | return;
1432 | }
1433 | if (!confirm('确定要删除此脚本吗?')) {
1434 | return;
1435 | }
1436 | let idx = this.myscript.scripts.indexOf(this.scriptsel);
1437 | this.myscript.scripts = this.myscript.scripts.filter((s) => s !== this.scriptsel);
1438 | this.scriptsel = this.myscript.scripts[idx] ? this.myscript.scripts[idx] : this.myscript.scripts.slice(-1)[0];
1439 | this.changescript();
1440 | },
1441 | newscript() {
1442 | this.myscript.scripts.push({
1443 | name: '我的脚本' + (++this.myscript.lastid),
1444 | locked: false,
1445 | value: this.scripttemplate,
1446 | });
1447 | this.scriptsel = this.myscript.scripts.slice(-1)[0];
1448 | this.changescript();
1449 | },
1450 | savescript(uploadnow) {
1451 | if (this.scriptsel2 === this.scriptsel) {
1452 | if (!this.scriptsel.locked) {
1453 | this.scriptsel.value = this.codeedit.getDoc().getValue();
1454 | }
1455 | }
1456 | if (this.scriptuploadtimer !== null) {
1457 | clearTimeout(this.scriptuploadtimer);
1458 | this.scriptuploadtimer = null;
1459 | }
1460 | if (uploadnow) {
1461 | this.uploadscript();
1462 | } else {
1463 | this.scriptuploadtimer = setTimeout(this.uploadscript, 1000);
1464 | }
1465 | },
1466 | uploadscript() {
1467 | this.scriptuploadtimer = null;
1468 | this.saveconfig('SCRIPT', {
1469 | lastid: this.myscript.lastid,
1470 | scripts: this.myscript.scripts.filter((s)=>!s.locked),
1471 | }, '');
1472 | },
1473 | checklock() {
1474 | return Array.from(tblselect).every((item) => !item.locked);
1475 | },
1476 | checkselect(permissive) {
1477 | if (!this.checklock()) {
1478 | return false;
1479 | }
1480 | if (!tblitem.some((item) => tblselect.has(item))) {
1481 | if (permissive) {
1482 | if (!confirm('没有选择要处理的字幕,继续吗?')) {
1483 | return false;
1484 | }
1485 | } else {
1486 | alert('请先选中要处理的字幕');
1487 | return false;
1488 | }
1489 | }
1490 | return true;
1491 | },
1492 | runscript(s) {
1493 | if (!this.checklock()) {
1494 | return false;
1495 | }
1496 | let f;
1497 | let message;
1498 | try {
1499 | f = compile_func(['alert', 'confirm'], s.value);
1500 | } catch(e) {
1501 | console.log(e);
1502 | alert('语法错误: ' + ('lineNumber' in e ? '('+e.lineNumber+'行 '+e.columnNumber+'列)' : '') + '\n' + e);
1503 | return false;
1504 | }
1505 | let items = sortedview().filter((item) => tblselect.has(item)).map((item) => ziprow(ocrresult.col, item.r));
1506 | let old = JSON.stringify(items, ocrresult.col);
1507 | try {
1508 | message = f(items);
1509 | } catch(e) {
1510 | console.log(e);
1511 | alert('运行时错误:\n' + e);
1512 | return false;
1513 | }
1514 | //console.log(old);
1515 | let changed = JSON.stringify(items, ocrresult.col) != old;
1516 | if (message !== '') this.setwaitstatus();
1517 | this.refreshafter(axios.post('/updateresult', {
1518 | changes: changed ? items : [],
1519 | checkpoint: changed ? '运行脚本“'+s.name+'”之前' : '',
1520 | message: typeof message === 'string' ? message : ('执行脚本“'+s.name+'”成功' + (changed ? '' : ',但没有任何条目被修改')),
1521 | compatlog: s.locked&&changed,
1522 | }));
1523 | if (changed) {
1524 | items.forEach((item) => tbllock(id2item.get(item.id)));
1525 | }
1526 | updatetbledit();
1527 | return true;
1528 | },
1529 |
1530 | startocr() {
1531 | if (ocrresult.row.length == 0) {
1532 | this.setwaitstatus();
1533 | this.refreshafter(axios.post('/startocr', { frame_range: Array.from(Array(this.info.nframes).keys()) }));
1534 | return;
1535 | }
1536 | this.showmyprompt('请输入OCR帧范围,格式为“起-终”', 0+'-'+(this.info.nframes-1), (val) => {
1537 | if (val !== null) {
1538 | let timesel = val.split('-').map((v) => parseInt(v, 10));
1539 | if (timesel.length == 2 && timesel[0] >= 0 && timesel[1] >= 0) {
1540 | timesel.sort((a, b) => a - b);
1541 | if (timesel[0] >= 0 && timesel[1] < this.info.nframes) {
1542 | this.setwaitstatus();
1543 | let frame_range = [];
1544 | for (let i = timesel[0]; i <= timesel[1]; i++) {
1545 | frame_range.push(i);
1546 | }
1547 | this.refreshafter(axios.post('/startocr', { frame_range: frame_range }));
1548 | return;
1549 | }
1550 | }
1551 | alert('无效值');
1552 | }
1553 | });
1554 | },
1555 | continueocr(restarttype) {
1556 | let msg = '确定要对';
1557 | let item_range = Array.from(tblselect).map((item) => item.r[colmap['id']]);
1558 | if (item_range.length == 0) {
1559 | msg += '上次未完成的任务';
1560 | } else {
1561 | msg += '选中的' + item_range.length + '条字幕';
1562 | }
1563 | msg += '执行“';
1564 | msg += ({ '': '继续OCR', 'all': '重新OCR', 'empty': '空项OCR' })[restarttype];
1565 | msg += '”操作吗?';
1566 | if (!confirm(msg)) {
1567 | return;
1568 | }
1569 | this.refreshafter(axios.post('/continueocr', { item_range: item_range, restarttype: restarttype }));
1570 | },
1571 | async stopocr() {
1572 | this.setwaitstatus();
1573 | this.refreshafter(axios.post('/stopocr'));
1574 | },
1575 |
1576 | createcheckpoint() {
1577 | this.showmyprompt('请输入备注(可选):', '手动创建的恢复点', (msg) => {
1578 | if (msg !== null) {
1579 | this.checkpoint(msg);
1580 | }
1581 | });
1582 | },
1583 | async checkpoint(msg) {
1584 | this.setwaitstatus();
1585 | this.refreshafter(axios.post('/checkpoint', { msg: msg }));
1586 | },
1587 | async rollback(checkpoint_id) {
1588 | if (confirm('确定要恢复到 #'+checkpoint_id+' 吗?\n字幕数据将被替换为恢复点中的数据。')) {
1589 | curedit = null;
1590 | tbledit(null);
1591 | tblitem.forEach((item) => tbllock(item));
1592 | this.checkpointonly = 0;
1593 | this.morelog = 0;
1594 | this.setwaitstatus();
1595 | this.refreshafter(axios.post('/rollback', { checkpoint_id: checkpoint_id }));
1596 | }
1597 | },
1598 |
1599 | async saveui() {
1600 | await this.saveconfig('UI', {
1601 | disp_h: this.getdisph(),
1602 | editorfontsize: this.editorfontsize,
1603 | editorup: this.editorup,
1604 | }, '');
1605 | },
1606 | async loadui() {
1607 | let uiconfig = await this.loadconfig('UI')
1608 | this.setframescale(uiconfig.disp_h !== undefined ? uiconfig.disp_h : 360);
1609 | this.editorfontsize = uiconfig.editorfontsize;
1610 | if (this.editorup !== undefined) this.editorup = uiconfig.editorup;
1611 | await this.saveui();
1612 | },
1613 |
1614 | appmouseup(e) {
1615 | this.framemouseup(e);
1616 | this.timebarmouseup(e);
1617 | },
1618 | appmousemove(e) {
1619 | this.videomousemove(e);
1620 | this.timebarmousemove(e);
1621 | },
1622 | appresize(e) {
1623 | this.redrawtimebar();
1624 | },
1625 | scrollstatus() {
1626 | if (!this.noautoscrollstatus) {
1627 | this.$nextTick(function() {
1628 | this.$refs.logs.scrollTop = this.$refs.logs.scrollHeight;
1629 | });
1630 | }
1631 | },
1632 | setwaitstatus() {
1633 | this.waitstatus = 1;
1634 | this.scrollstatus();
1635 | },
1636 |
1637 | async loadconfig(key, default_value) {
1638 | return (await axios.post('/loadconfig', {key:key, default_value:default_value})).data;
1639 | },
1640 | async saveconfig(key, value, msg) {
1641 | if (msg != '') {
1642 | this.setwaitstatus();
1643 | }
1644 | await axios.post('/saveconfig', {key:key, value:value, msg:msg});
1645 | if (msg != '') {
1646 | this.refreshnow();
1647 | }
1648 | },
1649 | refreshafter(p) {
1650 | p.then(() => this.refreshnow());
1651 | this.rerefresh = true;
1652 | },
1653 | refreshnow() {
1654 | if (this.statetimer === null) {
1655 | this.rerefresh = true;
1656 | } else {
1657 | clearTimeout(this.statetimer);
1658 | this.refresh();
1659 | }
1660 | },
1661 | async refresh() {
1662 | this.rerefresh = false;
1663 | this.statetimer = null;
1664 | try {
1665 | if (this.thumbnail.row.length < this.info.nframes) {
1666 | this.thumbnail = Object.freeze((await axios.post('/thumbnail')).data);
1667 | //console.log(this.thumbnail.row.length, this.info.nframes);
1668 | }
1669 |
1670 | } catch (err) {
1671 | console.log(err);
1672 | }
1673 | try {
1674 | let newresultdata = (await axios.post('/loadresult', null, { transformResponse: []})).data;
1675 | if (!this.rerefresh) {
1676 | if (updateresult(newresultdata)) {
1677 | this.scrollstatus();
1678 | }
1679 | }
1680 |
1681 | let lastline = JSON.stringify(this.logs.row.slice(-1));
1682 | this.logs = Object.freeze((await axios.post('/logs')).data);
1683 | if (lastline != JSON.stringify(this.logs.row.slice(-1))) {
1684 | this.scrollstatus();
1685 | }
1686 | this.waitstatus = 0;
1687 | } catch (err) {
1688 | console.log(err);
1689 | this.pageerror = 1;
1690 | this.scrollstatus();
1691 | }
1692 | this.loaded = 1;
1693 | if (this.rerefresh) {
1694 | await this.refresh();
1695 | return;
1696 | }
1697 | this.statetimer = setTimeout(this.refresh, 1000);
1698 | },
1699 |
1700 | showmyprompt(message, value, callback) {
1701 | this.myprompt = {
1702 | message: message,
1703 | value: value,
1704 | callback: callback,
1705 | };
1706 | this.$nextTick(function() {
1707 | this.$refs.promptbox.focus();
1708 | this.$refs.promptbox.select();
1709 | });
1710 | },
1711 | mypromptok() {
1712 | this.myprompt.callback(this.myprompt.value);
1713 | this.myprompt = null;
1714 | },
1715 | mypromptcancel() {
1716 | this.myprompt.callback(null);
1717 | this.myprompt = null;
1718 | },
1719 | mypromptkeydown(e) {
1720 | if (e.keyCode == 13) {
1721 | this.mypromptok();
1722 | } else if (e.key === "Escape") {
1723 | this.mypromptcancel();
1724 | } else {
1725 | return;
1726 | }
1727 | e.preventDefault();
1728 | },
1729 | },
1730 | });
--------------------------------------------------------------------------------
/video2sub.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | import ast
3 | import flask
4 | from flask.json import jsonify
5 | import werkzeug
6 | import sys
7 | import sqlite3
8 | import json
9 | import os
10 | import time
11 | import base64
12 | import urllib.parse
13 | import urllib.request
14 | import traceback
15 | import zlib
16 | import secrets
17 | import functools
18 | import csv
19 | import io
20 | import math
21 | from threading import Thread
22 | from cv2 import cv2 # make VSCode happy
23 | import numpy as np
24 | if os.name == 'nt':
25 | import win32file
26 | class FileLock:
27 | def __init__(self, path):
28 | self.path = path
29 | self.handle = None
30 | self.shared()
31 | def exclusive(self):
32 | self.close()
33 | self.handle = win32file.CreateFile(self.path, win32file.GENERIC_READ, 0, None, win32file.OPEN_EXISTING, 0, None)
34 | def shared(self):
35 | self.close()
36 | self.handle = win32file.CreateFile(self.path, win32file.GENERIC_READ, win32file.FILE_SHARE_READ, None, win32file.OPEN_EXISTING, 0, None)
37 | def close(self):
38 | if self.handle is not None:
39 | self.handle.Close()
40 | self.handle = None
41 | else:
42 | import fcntl
43 | class FileLock:
44 | def __init__(self, path):
45 | self.fd = os.open(path, os.O_RDONLY)
46 | def exclusive(self):
47 | fcntl.flock(self.fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
48 | def shared(self):
49 | fcntl.flock(self.fd, fcntl.LOCK_SH | fcntl.LOCK_NB)
50 | def close(self):
51 | os.close(self.fd)
52 |
53 | sys.stdout.reconfigure(errors='replace')
54 | sys.stderr.reconfigure(errors='replace')
55 |
56 | version = '1.0'
57 | dbver = 1
58 |
59 | if len(sys.argv) != 4:
60 | print('请使用launcher启动器')
61 | print('若要手动启动后端:')
62 | print(' %s [主机名] [端口名] [视频/数据库文件名]'%os.path.basename(sys.argv[0]))
63 | sys.exit(1)
64 | host = sys.argv[1]
65 | port = sys.argv[2]
66 | if sys.argv[3].lower().endswith('.v2s'):
67 | dbfile = sys.argv[3]
68 | if not os.path.isfile(dbfile):
69 | print('无法打开数据库文件')
70 | sys.exit(1)
71 | with sqlite3.connect(dbfile) as conn:
72 | video = os.path.join(os.path.dirname(dbfile), conn.cursor().execute("SELECT value FROM config WHERE key = 'file'").fetchone()[0])
73 | else:
74 | video = sys.argv[3]
75 | dbfile = os.path.splitext(video)[0] + '.v2s'
76 |
77 | try:
78 | lock = FileLock(video)
79 | except Exception:
80 | print('无法打开文件:', video)
81 | sys.exit(1)
82 | try:
83 | lock.exclusive()
84 | lock.shared()
85 | except Exception:
86 | print('无法取得文件锁(文件被占用?)')
87 | sys.exit(1)
88 |
89 | gconfig = ast.literal_eval(open('config.txt', 'r', encoding='utf_8_sig').read())
90 |
91 | mime = {
92 | '.jpg': 'image/jpeg',
93 | '.bmp': 'image/bmp',
94 | }
95 |
96 | class VideoReader:
97 | def __init__(self):
98 | self.file = video
99 | self.cap = cv2.VideoCapture(self.file)
100 | if not self.cap.isOpened():
101 | raise Exception('无法打开视频:%s'%self.file)
102 | self.width = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH))
103 | self.height = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
104 | self.fps = self.cap.get(cv2.CAP_PROP_FPS)
105 | self.nframes = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
106 | self.logs = []
107 | def read(self, frame):
108 | if self.cap.get(cv2.CAP_PROP_POS_FRAMES) != frame:
109 | self.cap.set(cv2.CAP_PROP_POS_FRAMES, frame)
110 | succ, img = self.cap.read()
111 | if succ:
112 | return img
113 | self.cap.release()
114 | self.cap = cv2.VideoCapture(self.file)
115 | if self.cap.isOpened():
116 | self.cap.set(cv2.CAP_PROP_POS_FRAMES, frame)
117 | succ, img = self.cap.read()
118 | if succ:
119 | self.logs.append(('读取视频帧%d/%d时发生错误,已尝试恢复;请考虑先转码一遍视频'%(frame, self.nframes), 'E'))
120 | return img
121 | self.logs.append(('读取视频帧%d/%d时发生错误;请考虑先转码一遍视频'%(frame, self.nframes), 'E'))
122 | return np.zeros((self.height, self.width, 3), np.uint8)
123 | def close(self):
124 | self.cap.release()
125 | def writelog(self, db):
126 | if len(self.logs) > 0:
127 | for str, level in self.logs:
128 | log(str, level, db=db)
129 | self.logs = []
130 | return True
131 | else:
132 | return False
133 |
134 |
135 |
136 | def connect_db():
137 | return sqlite3.connect(dbfile, timeout=999999)
138 |
139 | cap = VideoReader()
140 | width = cap.width
141 | height = cap.height
142 | fps = cap.fps
143 | nframes = cap.nframes
144 |
145 | def fpsfix(fps):
146 | fpsdict = {
147 | '23.976': (24000,1001),
148 | '29.970': (30000,1001),
149 | '59.940': (60000,1001),
150 | }
151 | key = '%.3f'%fps
152 | if gconfig['fix_ntsc_fps'] and key in fpsdict:
153 | return fpsdict[key]
154 | else:
155 | return (fps, 1)
156 |
157 | def frame2sec(n, fps):
158 | a, b = fpsfix(fps)
159 | return n * b / a
160 |
161 | ##############################################################################
162 |
163 | conn = connect_db()
164 | c = conn.cursor()
165 | c.execute('CREATE TABLE IF NOT EXISTS config (key TEXT PRIMARY KEY, value)')
166 |
167 | c.execute("INSERT OR IGNORE INTO config VALUES ('create_date', datetime('now','localtime'))")
168 | c.execute("INSERT OR IGNORE INTO config VALUES ('create_ver', ?)", (version,))
169 |
170 | c.execute("INSERT OR IGNORE INTO config VALUES ('db_ver', ?)", (dbver,))
171 | file_dbver = c.execute("SELECT value FROM config WHERE key = 'db_ver'").fetchone()[0]
172 | if dbver < file_dbver:
173 | print("版本过低,请升级")
174 | sys.exit(1)
175 | if file_dbver < dbver:
176 | print("正在升级数据库")
177 | if file_dbver == 1:
178 | assert False
179 |
180 | c.execute("INSERT OR REPLACE INTO config VALUES ('lastopen_date',datetime('now','localtime'))")
181 | c.execute("INSERT OR REPLACE INTO config VALUES ('lastopen_ver', ?)", (version,))
182 |
183 | c.execute("INSERT OR REPLACE INTO config VALUES ('file', ?)", (os.path.basename(video),))
184 |
185 | c.executemany('INSERT OR IGNORE INTO config VALUES (?,?)', [(k, json.dumps(v)) for k, v in gconfig['default'].items()])
186 |
187 |
188 | c.execute('CREATE TABLE IF NOT EXISTS filedb (file_id INTEGER PRIMARY KEY AUTOINCREMENT, format TEXT, data BLOB)')
189 | c.execute('''
190 | CREATE TABLE IF NOT EXISTS imgdb (
191 | imgdb_id INTEGER PRIMARY KEY AUTOINCREMENT,
192 | file_id INTEGER,
193 | imgdb_l INT, imgdb_t INT, imgdb_w INT, imgdb_h INT,
194 | FOREIGN KEY(file_id) REFERENCES filedb(file_id)
195 | )''')
196 |
197 | c.execute('CREATE TABLE IF NOT EXISTS thumbnail (frame_id INT UNIQUE, imgdb_id INT)')
198 | c.execute('''
199 | CREATE TABLE IF NOT EXISTS ocrresult (
200 | id INTEGER PRIMARY KEY AUTOINCREMENT,
201 | state TEXT,
202 | date TEXT,
203 | frame_start INT,
204 | frame_end INT,
205 | imgdb_id INT,
206 | engine TEXT,
207 | left INT,
208 | top INT,
209 | right INT,
210 | bottom INT,
211 | position INT,
212 | ocrtext TEXT,
213 | comment TEXT
214 | )''')
215 | c.execute('CREATE INDEX IF NOT EXISTS itemstate ON ocrresult (state)')
216 |
217 | c.execute('CREATE TABLE IF NOT EXISTS jobrange (id INTEGER PRIMARY KEY)')
218 |
219 | c.execute('CREATE TABLE IF NOT EXISTS logs (date TEXT, level CHAR(1), message TEXT, checkpoint_id INTEGER)')
220 | c.execute('CREATE TABLE IF NOT EXISTS checkpoint (checkpoint_id INTEGER PRIMARY KEY AUTOINCREMENT, dbver INT, date TEXT, message TEXT, data BLOB)')
221 |
222 | if file_dbver < dbver:
223 | if file_dbver == 1:
224 | assert False
225 | c.execute("INSERT OR REPLACE INTO config VALUES ('db_ver', ?)", (dbver,))
226 | print("数据库升级完成")
227 |
228 | conn.commit()
229 |
230 | logid_start = c.execute('SELECT MAX(rowid) FROM logs').fetchone()[0]
231 | logid_start = logid_start if logid_start is not None else 0
232 |
233 | def getconfig(db, key):
234 | val = db.cursor().execute('SELECT value FROM config WHERE key = ?', (key,)).fetchone()
235 | return val[0] if val is not None else None
236 | def putconfig(db, key, val):
237 | db.cursor().execute('INSERT OR REPLACE INTO config VALUES (?, ?)', (key, val))
238 |
239 | #levels: [E]rror [W]arning [I]nfo [S]uccess [C]heckpoint
240 | def log(str, level='I', checkpoint_id=None, db=None):
241 | #db = None
242 | if db != None:
243 | db.cursor().execute("INSERT INTO logs VALUES (datetime('now','localtime'), ?, ?, ?)", (level, str, checkpoint_id))
244 | else:
245 | print(level, str)
246 |
247 | def thread_yield():
248 | time.sleep(0.001) # 好像没用
249 |
250 | def addfile(fmt, blob, db):
251 | c = db.cursor()
252 | c.execute('INSERT INTO filedb (format, data) VALUES (?, ?)', (fmt, blob))
253 | return c.lastrowid
254 |
255 | def addimages(imglist, db, fmt='.jpg', fmtparam=None):
256 | succ, blob = cv2.imencode(fmt, cv2.vconcat(imglist), fmtparam)
257 | assert succ
258 | file_id = addfile(fmt, blob, db)
259 | c = db.cursor()
260 | result = []
261 | t = 0
262 | for img in imglist:
263 | l = 0
264 | h = img.shape[0]
265 | w = img.shape[1]
266 | c.execute('INSERT INTO imgdb (file_id, imgdb_l, imgdb_t, imgdb_w, imgdb_h) VALUES (?,?,?,?,?)', (file_id, l, t, w, h))
267 | result.append(c.lastrowid)
268 | t += h
269 | return result
270 |
271 | def db2json(db, sql, *param):
272 | c = db.cursor()
273 | row = c.execute(sql, *param).fetchall()
274 | col = [x[0] for x in c.description]
275 | return flask.jsonify({
276 | 'col': col,
277 | 'row': row,
278 | })
279 |
280 | def checkpoint(message, generate_log=True):
281 | dump = zlib.compress(
282 | json.dumps(
283 | c.execute('SELECT * FROM ocrresult').fetchall(),
284 | ensure_ascii=False, separators=(',', ':')).encode(),
285 | level=1)
286 | c.execute("INSERT INTO checkpoint (dbver, date, message, data) VALUES (?, datetime('now','localtime'), ?, ?)", (dbver, message, dump))
287 | checkpoint_id = c.lastrowid
288 | if generate_log:
289 | log('已创建恢复点 #%d %s'%(checkpoint_id, message), 'C', checkpoint_id=checkpoint_id, db=conn)
290 | print('checkpoint #%d (%s) is %.2fKB (zlib)' % (checkpoint_id, message, len(dump)/1024))
291 | return checkpoint_id
292 |
293 | def rollback(checkpoint_id):
294 | checkpoint_dbver, msg, dump = c.execute('SELECT dbver, message, data FROM checkpoint WHERE checkpoint_id = ?', (checkpoint_id,)).fetchone()
295 | if checkpoint_dbver != dbver:
296 | return '版本不符', msg
297 | c.execute('DELETE FROM ocrresult')
298 | rows = json.loads(zlib.decompress(dump).decode())
299 | if len(rows):
300 | c.executemany('INSERT INTO ocrresult VALUES (' + ','.join(['?']*len(rows[0])) + ')', rows)
301 | return None, msg
302 |
303 | ##############################################################################
304 |
305 | thumb_h = gconfig['thumbnail']['height']
306 | thumb_w = int(width / height * thumb_h)
307 | thumb_npart = gconfig['thumbnail']['npart']
308 | thumb_fmt = '.jpg'
309 | thumb_fmtparam = [int(cv2.IMWRITE_JPEG_QUALITY), gconfig['thumbnail']['jpg_quality']]
310 |
311 | subthumb_w = gconfig['subthumb']['width']
312 | subthumb_fmt = '.jpg'
313 | subthumb_fmtparam = [int(cv2.IMWRITE_JPEG_QUALITY), gconfig['subthumb']['jpg_quality']]
314 |
315 | frame_fmt = '.bmp'
316 | frame_fmtparam = None
317 |
318 |
319 | ##############################################################################
320 |
321 | class ImageVConcat:
322 | def __init__(self, imglist):
323 | self.h, self.w, _ = imglist[0].shape
324 | pad = np.zeros((self.h, self.w, 3), np.uint8)
325 | self.hdict = {}
326 | self.n = len(imglist)
327 | cat = []
328 | cath = 0
329 | for img in imglist:
330 | if len(cat):
331 | cat.append(pad)
332 | cath += self.h
333 | cat.append(img)
334 | for i in range(cath, cath + self.h):
335 | self.hdict[i] = []
336 | cath += self.h
337 | self.img = cv2.vconcat(cat)
338 | assert self.img.shape[0] == cath
339 | #print(self.hdict)
340 | def addresult(self, left, top, right, bottom, ocrtext, comment):
341 | left, top, right, bottom = int(left), int(top), int(right), int(bottom)
342 | #print(left, top, right, bottom, ocrtext, comment)
343 | for i in range(top, bottom):
344 | if i in self.hdict:
345 | self.hdict[i].append((top, bottom, left, right, ocrtext, comment))
346 | def getresult(self):
347 | result = []
348 | for h0 in range(0, 2*self.h*self.n, 2*self.h):
349 | s = set()
350 | for i in range(h0, h0 + self.h):
351 | print(h0, self.hdict[i])
352 | s.update(self.hdict[i])
353 | ocrtext = '\n'.join([ocrtext for top, bottom, left, right, ocrtext, comment in sorted(s)])
354 | comment = '#'.join([comment for top, bottom, left, right, ocrtext, comment in sorted(s)])
355 | result.append(('done', ocrtext, comment))
356 | return result
357 |
358 | ocr_stop = False
359 |
360 |
361 | class BaiduOcr:
362 | token = None
363 | lastreq = 0
364 | def __init__(self, arg):
365 | self.service = arg
366 | self.use_batch = not arg.endswith('_basic')
367 | self.ocr_batch = gconfig['baiduocr']['batch_size'] if self.use_batch else 1
368 | self.ninvoke = 0
369 |
370 | def fetch_token(self):
371 | if BaiduOcr.token is not None:
372 | return
373 | params = {'grant_type': 'client_credentials',
374 | 'client_id': gconfig['baiduocr']['API_KEY'],
375 | 'client_secret': gconfig['baiduocr']['SECRET_KEY']}
376 | post_data = urllib.parse.urlencode(params)
377 | post_data = post_data.encode('utf-8')
378 | req = urllib.request.Request(gconfig['baiduocr']['TOKEN_URL'], post_data)
379 | f = urllib.request.urlopen(req, timeout=5)
380 | result_str = f.read()
381 |
382 | result_str = result_str.decode()
383 | result = json.loads(result_str)
384 | if ('access_token' in result.keys() and 'scope' in result.keys()):
385 | if not 'brain_all_scope' in result['scope'].split(' '):
386 | raise Exception('please ensure has check the ability')
387 | BaiduOcr.token = result['access_token']
388 | else:
389 | raise Exception('please overwrite the correct API_KEY and SECRET_KEY')
390 | def request(self, url, data):
391 | req = urllib.request.Request(url, data.encode('utf-8'))
392 | f = urllib.request.urlopen(req)
393 | result_str = f.read()
394 | result_str = result_str.decode()
395 | return result_str
396 | def run(self, imglist):
397 | mintimediff = 1 / gconfig['baiduocr']['qps_limit']
398 | if self.use_batch:
399 | vcat = ImageVConcat(imglist)
400 | imglist = [vcat.img]
401 | else:
402 | results = []
403 | for img in imglist:
404 | errmsg = None
405 | try:
406 | if time.time() - BaiduOcr.lastreq < mintimediff:
407 | time.sleep(mintimediff - (time.time() - BaiduOcr.lastreq))
408 | self.fetch_token()
409 | url = gconfig['baiduocr']['OCR_URL'] + self.service + "?access_token=" + BaiduOcr.token
410 | #print(url)
411 | succ, blob = cv2.imencode('.jpg', img, [int(cv2.IMWRITE_JPEG_QUALITY), 99])
412 | assert succ
413 | file_content = blob.tobytes()
414 | #with open('dump.jpg', 'wb') as f:
415 | # f.write(file_content)
416 | self.ninvoke += 1
417 | result = self.request(url, urllib.parse.urlencode({
418 | 'image': base64.b64encode(file_content),
419 | 'language_type': gconfig['baiduocr']['language_type'],
420 | 'probability': 'true',
421 | }))
422 | result_json = json.loads(result)
423 | if 'error_msg' in result_json:
424 | errmsg = result_json['error_msg']
425 | if "words_result" not in result_json:
426 | raise Exception(str(result_json))
427 | print(str(result_json))
428 |
429 | if self.use_batch:
430 | for words_result in result_json["words_result"]:
431 | left = words_result['location']['left']
432 | top = words_result['location']['top']
433 | right = left + words_result['location']['width']
434 | bottom = top + words_result['location']['height']
435 | vcat.addresult(left, top, right, bottom, words_result["words"], str(words_result))
436 | results = vcat.getresult()
437 | else:
438 | results.append(('done', '\n'.join([words_result["words"] for words_result in result_json["words_result"]]), str(result_json)))
439 | except Exception as e:
440 | traceback.print_exc()
441 | if errmsg is None:
442 | errmsg = str(e)
443 | if self.use_batch:
444 | results = [('error', errmsg)] * vcat.n
445 | else:
446 | results.append(('error', errmsg))
447 | BaiduOcr.lastreq = time.time()
448 | if ocr_stop:
449 | break
450 | return results
451 | def done(self):
452 | return '调用了%d次百度文字识别API(%s)'%(self.ninvoke,self.service)
453 |
454 | class ChineseOcr:
455 | ocr_batch = 5
456 | global ocr_stop
457 | def __init__(self, arg):
458 | if arg == 'multi':
459 | self.textLine = b'false'
460 | if arg == 'single':
461 | self.textLine = b'true'
462 | def run(self, imglist):
463 | results = []
464 | for img in imglist:
465 | format = '.bmp'
466 | succ, blob = cv2.imencode(format, img)
467 | assert succ
468 | #with open('dump.bmp', 'wb') as f:
469 | # f.write(blob.tobytes())
470 | try:
471 | data = b'{"imgString":"data:%s;base64,%s","textAngle":false,"textLine":%s}' % (mime[format].encode(), base64.b64encode(blob.tobytes()), self.textLine)
472 | req = urllib.request.urlopen(gconfig['chineseocr']['url'], data, timeout=5)
473 | rsp = json.load(req)
474 | result = ('done', '\n'.join([item['text'] for item in rsp['res']]))
475 | except Exception as e:
476 | traceback.print_exc()
477 | result = ('error', str(e))
478 | results.append(result)
479 | if ocr_stop:
480 | break
481 | return results
482 | def done(self):
483 | return None
484 |
485 | class DummyOcr:
486 | ocr_batch = 30
487 | def run(self, imglist):
488 | if gconfig['dummyocr']['always_error']:
489 | return [('error', None)] * len(imglist)
490 | else:
491 | return [('done', gconfig['dummyocr']['text'])] * len(imglist)
492 | def done(self):
493 | return None
494 |
495 | def createocr(engine, h):
496 | if engine == 'dummyocr':
497 | return DummyOcr()
498 | if engine.startswith('chineseocr:'):
499 | return ChineseOcr(engine[len('chineseocr:'):])
500 | if engine.startswith('baiduocr:'):
501 | return BaiduOcr(engine[len('baiduocr:'):])
502 | assert False, "无效OCR引擎名"+engine
503 |
504 | def run_ocrjob():
505 | global ocr_stop
506 | cap = VideoReader()
507 | conn = connect_db()
508 | c = conn.cursor()
509 |
510 | total = c.execute("SELECT COUNT(id) FROM ocrresult WHERE EXISTS (SELECT * FROM jobrange WHERE ocrresult.id = jobrange.id)").fetchone()[0]
511 | progress = c.execute("SELECT COUNT(id) FROM ocrresult WHERE EXISTS (SELECT * FROM jobrange WHERE ocrresult.id = jobrange.id) AND state != 'waitocr' AND state != 'error'").fetchone()[0]
512 | emptycnt = c.execute("SELECT COUNT(id) FROM ocrresult WHERE EXISTS (SELECT * FROM jobrange WHERE ocrresult.id = jobrange.id) AND state != 'waitocr' AND state != 'error' AND ocrtext == ''").fetchone()[0]
513 | errcnt = c.execute("SELECT COUNT(id) FROM ocrresult WHERE EXISTS (SELECT * FROM jobrange WHERE ocrresult.id = jobrange.id) AND state == 'error'").fetchone()[0]
514 |
515 | period_frames = 0
516 | period_start = 0
517 |
518 | curengine = ''
519 | ocr = None
520 |
521 | while True:
522 | lines = c.execute("SELECT id, engine, top, bottom, frame_start FROM ocrresult WHERE EXISTS (SELECT * FROM jobrange WHERE ocrresult.id = jobrange.id) AND state = 'waitocr' ORDER BY engine, top, frame_start").fetchall()
523 | if len(lines) == 0:
524 | break
525 | lines.reverse()
526 | while True:
527 | if ocr_stop:
528 | cap.writelog(conn)
529 | msg = ocr.done()
530 | if msg:
531 | log(msg, 'I', db=conn)
532 | log('OCR任务已暂停', 'W', db=conn)
533 | conn.commit()
534 | conn.close()
535 | cap.close()
536 | return
537 | # show status
538 | if len(lines) == 0 or (time.time() - period_start) > 5:
539 | speed = period_frames / (time.time() - period_start)
540 | log('OCR任务进度: %.1f%% [总共%d, 完成%d(空项%d), 错误%d, 剩余%d, fps=%.1f]' % (int((progress+errcnt)/total*1000)/10, total, progress, emptycnt, errcnt, total-progress-errcnt, speed), db=conn)
541 | conn.commit()
542 | period_frames = 0
543 | period_start = time.time()
544 |
545 | if len(lines) == 0:
546 | break
547 |
548 | # collect batch
549 | batch = []
550 | _, engine0, top0, bottom0, _ = lines[-1]
551 | if engine0 != curengine:
552 | curengine = engine0
553 | if ocr:
554 | msg = ocr.done()
555 | if msg:
556 | log(msg, 'I', db=conn)
557 | ocr = createocr(engine0, bottom0+1 - top0)
558 | while len(lines) > 0 and len(batch) < ocr.ocr_batch:
559 | _, engine, top, bottom, _ = lines[-1]
560 | if (engine, top, bottom) == (engine0, top0, bottom0):
561 | batch.append(lines.pop())
562 | else:
563 | break
564 | print(batch)
565 |
566 | # process batch
567 | parts = []
568 | subthumbs = []
569 | for id, engine, top, bottom, frame_id in batch:
570 | img = cap.read(frame_id)
571 | img = img[top:bottom+1]
572 | parts.append(img)
573 | h = int(subthumb_w * (bottom+1 - top) / width)
574 | img = cv2.resize(img, (subthumb_w, h), interpolation=cv2.INTER_AREA)
575 | subthumbs.append(img)
576 |
577 | result = ocr.run(parts) # [ (state, ocrtext, comment)
578 | print('\n'.join(['OCR[%d]: %s'%(batch[i][0], result[i] if i < len(result) else None) for i in range(len(batch))]))
579 | imgdb_ids = addimages(subthumbs, conn, subthumb_fmt, subthumb_fmtparam)
580 | for i, r in enumerate(result):
581 | assert type(r) is tuple
582 | c.execute('UPDATE ocrresult SET %s, imgdb_id = ? WHERE id = ?' %
583 | ', '.join(['state = ?', 'ocrtext = ?', 'comment = ?'][:len(r)]),
584 | r + (imgdb_ids[i], batch[i][0]))
585 | if r[0] == 'error':
586 | errcnt += 1
587 | else:
588 | if r[1] == '':
589 | emptycnt += 1
590 | progress += 1
591 | period_frames += len(result)
592 | conn.commit()
593 | thread_yield()
594 |
595 | cap.writelog(conn)
596 | msg = ocr.done()
597 | if msg:
598 | log(msg, 'I', db=conn)
599 | msg = 'OCR任务已完成(空项数%d)'%emptycnt if errcnt == 0 else 'OCR任务已完成(空项数%d),但有%d个错误发生,请使用“继续OCR”功能来重试错误项'%(emptycnt,errcnt)
600 | log(msg, 'S', db=conn)
601 | if emptycnt:
602 | log('OCR结果中有空项,请注意处理', 'W', db=conn)
603 | conn.commit()
604 | conn.close()
605 | cap.close()
606 |
607 | ocr_thread = None
608 | def startocr():
609 | global ocr_stop
610 | global ocr_thread
611 | if ocr_thread is not None:
612 | ocr_thread.join()
613 | ocr_stop = False
614 | ocr_thread = Thread(target=run_ocrjob)
615 | ocr_thread.start()
616 |
617 | ##############################################################################
618 |
619 |
620 |
621 | def make_thumbnail():
622 | cap = VideoReader()
623 | conn = connect_db()
624 | c = conn.cursor()
625 | if getconfig(conn, 'thumb_h') != thumb_h:
626 | putconfig(conn, 'thumb_h', thumb_h)
627 | c.execute('DELETE FROM thumbnail')
628 | start = c.execute('SELECT MAX(frame_id) + 1 FROM thumbnail').fetchone()[0]
629 | start = 0 if start is None else start
630 | if start < nframes - 1:
631 | for part_start in range(start, nframes, thumb_npart):
632 | parts = []
633 | frame_ids = []
634 | for i in range(part_start, min(part_start + thumb_npart, nframes)):
635 | img = cap.read(i)
636 | img = cv2.resize(img, (thumb_w, thumb_h), interpolation=cv2.INTER_AREA)
637 | parts.append(img)
638 | frame_ids.append(i)
639 | imgdb_ids = addimages(parts, conn, thumb_fmt, thumb_fmtparam)
640 | c.executemany('INSERT INTO thumbnail (frame_id, imgdb_id) VALUES (?, ?)', list(zip(frame_ids, imgdb_ids)))
641 | if int(part_start / nframes * 100) != int((part_start - thumb_npart) / nframes * 100):
642 | log('生成缩略图: %d%%' % int(part_start / nframes * 100), db=conn)
643 | conn.commit()
644 | thread_yield()
645 | cap.writelog(conn)
646 | log('生成缩略图已完成', 'I', db=conn)
647 | conn.commit()
648 | conn.close()
649 | cap.close()
650 |
651 | def checkwaitocr(db):
652 | if db.cursor().execute("SELECT COUNT(id) FROM ocrresult WHERE state == 'waitocr'").fetchone()[0]:
653 | log('有未完成的OCR任务,可使用“继续OCR”功能继续上次的进度', 'W', db=db)
654 | def do_init():
655 | conn = connect_db()
656 | c = conn.cursor()
657 | t = Thread(target=make_thumbnail)
658 | t.start()
659 | t.join()
660 | log('后端已启动', 'S', db=conn)
661 | checkwaitocr(db=conn)
662 | conn.commit()
663 | conn.close()
664 |
665 | init_thread = Thread(target=do_init)
666 | init_thread.start()
667 |
668 |
669 |
670 | ##############################################################################
671 |
672 |
673 | app = flask.Flask(__name__)
674 | session = None
675 |
676 | def session_header_required(f):
677 | @functools.wraps(f)
678 | def wrapper(*args, **kwds):
679 | if 'X-VIDEO2SUB-SESSION' not in flask.request.headers or flask.request.headers['X-VIDEO2SUB-SESSION'] != session:
680 | return flask.make_response('', 403)
681 | return f(*args, **kwds)
682 | return wrapper
683 |
684 | @app.route('/')
685 | def serve_home():
686 | return flask.redirect('/ui/ui.html')
687 |
688 | @app.route('/ui/')
689 | def serve_ui(filename):
690 | return flask.send_from_directory('ui', filename)
691 |
692 | @app.route('/getpid', methods=['POST'])
693 | def serve_getpid():
694 | return flask.jsonify(os.getpid())
695 |
696 | @app.route('/session', methods=['POST'])
697 | def serve_session():
698 | global session
699 | session = secrets.token_hex()
700 | return flask.jsonify(session)
701 |
702 | @app.route('/havesession', methods=['POST'])
703 | def serve_havesession():
704 | global session
705 | return flask.jsonify(session is not None)
706 |
707 | @app.route('/info', methods=['POST'])
708 | @session_header_required
709 | def serve_info():
710 | return flask.jsonify({
711 | 'file': os.path.basename(video),
712 | 'width': width,
713 | 'height': height,
714 | 'fps': fps,
715 | 'nframes': nframes,
716 | 'thumb_w': thumb_w,
717 | 'thumb_h': thumb_h,
718 | })
719 |
720 | @app.route('/file')
721 | def serve_img():
722 | file_id = flask.request.args.get('id', type=int)
723 | row = c.execute('SELECT format, data FROM filedb WHERE file_id = ?', (file_id,)).fetchone()
724 | return flask.Response(row[1], mimetype=mime[row[0]])
725 |
726 | @app.route('/frame')
727 | def serve_frame():
728 | frame_id = flask.request.args.get('id', type=int)
729 | frame = cap.read(frame_id)
730 | if cap.writelog(conn):
731 | conn.commit()
732 | succ, blob = cv2.imencode(frame_fmt, frame, frame_fmtparam)
733 | assert succ
734 | return flask.Response(blob.tobytes(), mimetype=mime[frame_fmt])
735 |
736 | @app.route('/thumbnail', methods=['POST'])
737 | @session_header_required
738 | def serve_thumbnail():
739 | return db2json(conn, 'SELECT frame_id, imgdb.* FROM thumbnail JOIN imgdb USING (imgdb_id) ORDER BY frame_id')
740 |
741 | @app.route('/logs', methods=['POST'])
742 | @session_header_required
743 | def serve_logs():
744 | return db2json(conn, '''
745 | SELECT id, id > ? AS cursession, date, level, message, checkpoint_id FROM (
746 | SELECT * FROM (SELECT ROWID AS id, * FROM logs WHERE checkpoint_id IS NULL ORDER BY ROWID DESC LIMIT ?)
747 | UNION ALL
748 | SELECT * FROM (SELECT ROWID AS id, * FROM logs WHERE checkpoint_id IS NOT NULL ORDER BY ROWID DESC LIMIT ?)
749 | ) ORDER BY id''', (logid_start, gconfig['max_log'], gconfig['max_checkpoint']))
750 |
751 | @app.route('/state', methods=['POST'])
752 | @session_header_required
753 | def serve_state():
754 | loaded = not init_thread.is_alive()
755 | ocractive = ocr_thread is not None and ocr_thread.is_alive()
756 | lastjob = c.execute("SELECT COUNT(id) FROM ocrresult WHERE state = 'waitocr' or state = 'error'").fetchone()[0]
757 |
758 | ocrconf = json.loads(getconfig(conn, 'OCR'))
759 | ocrtop, ocrbottom = ocrconf['top'], ocrconf['bottom']
760 | if ocrtop >= 0 and ocrbottom >= 0:
761 | curarea = c.execute('SELECT COUNT(id) FROM ocrresult WHERE top = ? AND bottom = ?', (ocrtop, ocrbottom)).fetchone()[0]
762 | else:
763 | curarea = 0
764 |
765 | return flask.jsonify({
766 | 'loaded': loaded,
767 | 'ocractive': ocractive,
768 | 'lastjob': lastjob,
769 | 'curarea': curarea,
770 | })
771 |
772 | @app.route('/exportass', methods=['POST'])
773 | @session_header_required
774 | def serve_exportass():
775 | row = c.execute("SELECT frame_start, frame_end, ocrtext, top, bottom, position FROM ocrresult WHERE state = 'done' AND ocrtext != '' ORDER BY frame_start, frame_end").fetchall()
776 | if len(row) == 0:
777 | log('无字幕数据', 'I', db=conn)
778 | conn.commit()
779 | return ''
780 | outfile = os.path.splitext(video)[0] + gconfig['export_suffix'] + '.ass'
781 | if os.path.exists(outfile) and not gconfig['export_overwrite']:
782 | log('输出文件已存在,请先删除:%s'%outfile, 'E', db=conn)
783 | conn.commit()
784 | return ''
785 | def frame2timestr(frame, fps, alignment):
786 | fn, shiftframe, eps, shift100 = alignment
787 | sec100 = getattr(math, fn)(frame2sec(frame + shiftframe, fps) * 100 + eps) + shift100
788 | sec100 = sec100 if sec100 >= 0 else 0
789 | fs = sec100 % 100
790 | s = sec100 // 100 % 60
791 | m = sec100 // 100 // 60 % 60
792 | h = sec100 // 100 // 60 // 60
793 | return '%d:%02d:%02d.%02d'%(h,m,s,fs)
794 | best_top = c.execute("SELECT top, COUNT(top) cnt FROM ocrresult WHERE state = 'done' AND ocrtext != '' AND position = 8 GROUP BY top ORDER BY cnt DESC LIMIT 1").fetchone()
795 | best_bottom = c.execute("SELECT bottom, COUNT(bottom) cnt FROM ocrresult WHERE state = 'done' AND ocrtext != '' AND position = 2 GROUP BY bottom ORDER BY cnt DESC LIMIT 1").fetchone()
796 | best_height = c.execute("SELECT bottom+1-top, COUNT(bottom+1-top) cnt FROM ocrresult WHERE state = 'done' AND ocrtext != '' AND position = 2 GROUP BY bottom+1-top ORDER BY cnt DESC LIMIT 1").fetchone()
797 | best_top = best_top[0] if best_top is not None else 10
798 | best_bottom = best_bottom[0] if best_bottom is not None else 10
799 | best_height = best_height[0] if best_height is not None else c.execute("SELECT bottom+1-top, COUNT(bottom+1-top) cnt FROM ocrresult WHERE state = 'done' AND ocrtext != '' GROUP BY bottom+1-top ORDER BY cnt DESC LIMIT 1").fetchone()[0]
800 | f = open(outfile, 'w', encoding='utf_8_sig', newline='\r\n')
801 | s = gconfig['ass_format']['header']
802 | s = s.replace('{{文件名}}', os.path.basename(video))
803 | s = s.replace('{{视频宽度}}', str(width))
804 | s = s.replace('{{视频高度}}', str(height))
805 | s = s.replace('{{字幕高度}}', str(best_height))
806 | s = s.replace('{{上半屏顶边距}}', str(best_top))
807 | s = s.replace('{{下半屏底边距}}', str(height-best_bottom-1))
808 | f.write(s)
809 | for frame_start, frame_end, ocrtext, top, bottom, position in row:
810 | s = gconfig['ass_format']['bottom_half' if position == 2 else 'top_half']
811 | start_time = frame2timestr(frame_start, fps, gconfig['ass_format']['time_alignment']['start'])
812 | end_time = frame2timestr(frame_end+1, fps, gconfig['ass_format']['time_alignment']['end'])
813 | s = s.replace('{{开始时间}}', start_time)
814 | s = s.replace('{{结束时间}}', end_time)
815 | s = s.replace('{{字幕文本}}', ocrtext.replace('\n', '\\N'))
816 | f.write(s)
817 | f.close()
818 | log('已导出至:%s'%outfile, 'S', db=conn)
819 | conn.commit()
820 | return 'ok'
821 |
822 | @app.route('/exportcsv', methods=['POST'])
823 | @session_header_required
824 | def serve_exportcsv():
825 | outfile = os.path.splitext(video)[0] + gconfig['export_suffix'] + '.csv'
826 | output = io.StringIO()
827 | row = c.execute('SELECT * FROM ocrresult ORDER BY frame_start,frame_end,top,bottom,engine,id').fetchall()
828 | col = [x[0] for x in c.description]
829 | w = csv.writer(output)
830 | w.writerow(col)
831 | for r in row:
832 | w.writerow(map(lambda x: x if x is not None else 'SQLITE_NULL', r))
833 | if os.path.exists(outfile) and not gconfig['export_overwrite']:
834 | log('输出文件已存在,请先删除:%s'%outfile, 'E', db=conn)
835 | output.close()
836 | conn.commit()
837 | return ''
838 | with open(outfile, 'wb') as f:
839 | f.write(output.getvalue().encode('utf_8_sig'))
840 | log('已导出至:%s'%outfile, 'S', db=conn)
841 | output.close()
842 | conn.commit()
843 | return 'ok'
844 |
845 | @app.route('/importcsv', methods=['POST'])
846 | @session_header_required
847 | def serve_importcsv():
848 | try:
849 | with conn:
850 | csvfile = flask.request.files['csv'].read()
851 | encoding = 'utf_8_sig' if csvfile.startswith(b'\xEF\xBB\xBF') else None
852 | r = csv.reader(io.TextIOWrapper(io.BytesIO(csvfile), encoding=encoding))
853 | col = next(r)
854 | colmap = dict([(c, i) for i, c in enumerate(col)])
855 | coltype = dict(c.execute("SELECT name,type FROM pragma_table_info('ocrresult')").fetchall())
856 | def convertvalue(v, c):
857 | if v == 'SQLITE_NULL':
858 | return None
859 | if coltype[c].startswith('INT'):
860 | return int(v)
861 | return v
862 | ins = []
863 | upd = []
864 | asnew = int(flask.request.form['asnew'])
865 | for row in r:
866 | id = row[colmap['id']]
867 | if id == '':
868 | continue
869 | if int(id) == 0 or asnew:
870 | ins.append(tuple([convertvalue(v, c) for v, c in zip(row, col) if c != 'id']))
871 | else:
872 | upd.append(tuple([convertvalue(v, c) for v, c in zip(row, col) if c != 'id'] + [int(id)]))
873 |
874 | checkpoint(flask.request.form['checkpoint'])
875 | # FIXME: sql inject
876 | c.executemany('UPDATE ocrresult SET ' + ','.join([c + ' = ?' for c in col if c != 'id']) + ' WHERE id = ?', upd)
877 | updcnt = c.rowcount
878 | c.executemany('INSERT INTO ocrresult (' + ','.join([c for c in col if c != 'id']) + ') VALUES (' + ','.join(['?' for c in col if c != 'id']) + ')', ins)
879 | inscnt = c.rowcount
880 | log('导入CSV文件成功,修改了%d条字幕,新增了%d条字幕'%(updcnt,inscnt), 'S', db=conn)
881 | except Exception:
882 | traceback.print_exc()
883 | log('导入CSV文件失败,请查看控制台中的错误详细信息', 'E', db=conn)
884 | conn.commit()
885 | return ''
886 |
887 | @app.route('/checkpoint', methods=['POST'])
888 | @session_header_required
889 | def serve_checkpoint():
890 | data = flask.request.get_json()
891 | msg = data['msg']
892 | checkpoint(msg)
893 | conn.commit()
894 | return ''
895 |
896 | @app.route('/rollback', methods=['POST'])
897 | @session_header_required
898 | def serve_rollback():
899 | if init_thread.is_alive():
900 | log('请等待后端启动完成', 'E', db=conn)
901 | conn.commit()
902 | return ''
903 | if ocr_thread is not None and ocr_thread.is_alive():
904 | log('请先暂停OCR任务', 'E', db=conn)
905 | conn.commit()
906 | return ''
907 | data = flask.request.get_json()
908 | checkpoint_id = data['checkpoint_id']
909 | err, msg = rollback(checkpoint_id)
910 | if err is None:
911 | log('已还原到恢复点 #%d %s'%(checkpoint_id, msg), 'S', db=conn)
912 | checkwaitocr(db=conn)
913 | else:
914 | log('无法还原到恢复点 #%d (%s)'%(checkpoint_id, err), 'E', db=conn)
915 | conn.commit()
916 | return ''
917 |
918 | @app.route('/loadconfig', methods=['POST'])
919 | @session_header_required
920 | def serve_loadconfig():
921 | data = flask.request.get_json()
922 | value = getconfig(conn, data['key'])
923 | if value:
924 | return value
925 | default_value = json.dumps(data['default_value'])
926 | c.execute('INSERT INTO config VALUES (?, ?)', (data['key'], default_value))
927 | conn.commit()
928 | return default_value
929 |
930 | @app.route('/saveconfig', methods=['POST'])
931 | @session_header_required
932 | def serve_saveconfig():
933 | data = flask.request.get_json()
934 | putconfig(conn, data['key'], json.dumps(data['value']))
935 | msg = data['msg'] if 'msg' in data else ('%s设置已保存' % data['key'])
936 | if len(msg):
937 | log(msg, 'I', db=conn)
938 | conn.commit()
939 | return ''
940 |
941 | @app.route('/allengines', methods=['POST'])
942 | @session_header_required
943 | def serve_allengines():
944 | return flask.jsonify(gconfig['allengines'])
945 |
946 | @app.route('/updateresult', methods=['POST'])
947 | @session_header_required
948 | def serve_updateresult():
949 | data = flask.request.get_json()
950 | checkpoint_id = None
951 | if data['checkpoint']:
952 | checkpoint_id = checkpoint(data['checkpoint'], not data['compatlog'])
953 | cols = [x[0] for x in c.execute("SELECT name FROM pragma_table_info('ocrresult')").fetchall() if x[0] != 'id']
954 | upd = []
955 | ins = []
956 | for item in data['changes']:
957 | if 'id' in item and item['id'] > 0:
958 | if all(col in item for col in cols):
959 | upd.append(tuple([item[col] for col in cols] + [item['id']]))
960 | else:
961 | curcols = [col for col in cols if col in item]
962 | c.execute('UPDATE ocrresult SET ' + ','.join([c + ' = ?' for c in curcols]) + ' WHERE id = ?', tuple(item[c] for c in curcols) + (item['id'],))
963 | else:
964 | ins.append(tuple([item[col] for col in cols]))
965 | c.executemany('UPDATE ocrresult SET ' + ','.join([c + ' = ?' for c in cols]) + ' WHERE id = ?', upd)
966 | updcnt = c.rowcount
967 | c.executemany('INSERT INTO ocrresult (' + ','.join([c for c in cols]) + ') VALUES (' + ','.join(['?' for c in cols]) + ')', ins)
968 | inscnt = c.rowcount
969 | c.execute("DELETE FROM ocrresult WHERE state = 'delete'")
970 | delcnt = c.rowcount
971 | if data['message']:
972 | log(data['message'], 'S', db=conn, checkpoint_id=checkpoint_id if data['compatlog'] else None)
973 | conn.commit()
974 | return flask.jsonify({
975 | 'updcnt': updcnt,
976 | 'inscnt': inscnt,
977 | 'delcnt': delcnt,
978 | })
979 |
980 | @app.route('/loadresult', methods=['POST'])
981 | @session_header_required
982 | def serve_loadresult():
983 | return db2json(conn, 'SELECT * FROM ocrresult LEFT JOIN imgdb USING (imgdb_id)')
984 |
985 | @app.route('/startocr', methods=['POST'])
986 | @session_header_required
987 | def serve_startocr():
988 | if init_thread.is_alive():
989 | log('请等待后端启动完成', 'E', db=conn)
990 | conn.commit()
991 | return ''
992 | ocrconf = json.loads(getconfig(conn, 'OCR'))
993 | ocrengine, ocrtop, ocrbottom = ocrconf['engine'], ocrconf['top'], ocrconf['bottom']
994 | ocrleft, ocrright = 0, width-1
995 | position = 2 if ocrbottom >= height // 2 else 8
996 | if ocrtop < 0 or ocrbottom < 0:
997 | log('请先指定字幕在屏幕上的范围', 'E', db=conn)
998 | conn.commit()
999 | return ''
1000 | if ocr_thread is not None and ocr_thread.is_alive():
1001 | log('已有OCR任务运行中,无法启动新任务', 'E', db=conn)
1002 | conn.commit()
1003 | return ''
1004 | checkpoint('“新OCR”之前 [%s]'%ocrengine)
1005 | data = flask.request.get_json()
1006 | frame_range = data['frame_range']
1007 | if frame_range is None:
1008 | frame_range = list(range(0, nframes))
1009 | c.execute('DELETE FROM jobrange')
1010 | for frame_id in sorted(frame_range):
1011 | c.execute("INSERT INTO ocrresult (date, state, frame_start, frame_end, engine, left, top, right, bottom, position) VALUES (datetime('now','localtime'), ?, ?, ?, ?, ?, ?, ?, ?, ?)", ('waitocr', frame_id, frame_id, ocrengine, ocrleft, ocrtop, ocrright, ocrbottom, position))
1012 | c.execute('INSERT INTO jobrange VALUES (?)', (c.lastrowid,))
1013 | log('OCR任务已提交 [%s]'%ocrengine, db=conn)
1014 | conn.commit()
1015 | startocr()
1016 | return 'ok'
1017 |
1018 | @app.route('/stopocr', methods=['POST'])
1019 | @session_header_required
1020 | def serve_stopocr():
1021 | global ocr_stop
1022 | if ocr_thread is None or not ocr_thread.is_alive():
1023 | log('无运行中的OCR任务', 'I', db=conn)
1024 | conn.commit()
1025 | return ''
1026 | ocr_stop = True
1027 | log('暂停请求已提交', 'I', db=conn)
1028 | conn.commit()
1029 | return ''
1030 |
1031 | @app.route('/continueocr', methods=['POST'])
1032 | @session_header_required
1033 | def serve_continueocr():
1034 | if init_thread.is_alive():
1035 | log('请等待后端启动完成', 'E', db=conn)
1036 | conn.commit()
1037 | return ''
1038 | if ocr_thread is not None and ocr_thread.is_alive():
1039 | log('已有OCR任务运行中,无法启动新任务', 'E', db=conn)
1040 | conn.commit()
1041 | return ''
1042 | data = flask.request.get_json()
1043 | if data['item_range'] is not None:
1044 | if len(data['item_range']) > 0:
1045 | c.execute('DELETE FROM jobrange')
1046 | c.executemany('INSERT INTO jobrange VALUES (?)', [(id,) for id in sorted(data['item_range'])])
1047 | else:
1048 | c.execute("INSERT OR IGNORE INTO jobrange SELECT id FROM ocrresult WHERE (state = 'waitocr' OR state = 'error')")
1049 | restarttype = data['restarttype']
1050 | new_engine = json.loads(getconfig(conn, 'OCR'))['engine']
1051 | if restarttype == '':
1052 | errcnt = c.execute("""
1053 | SELECT COUNT(id) FROM ocrresult WHERE EXISTS (SELECT * FROM jobrange WHERE ocrresult.id = jobrange.id) AND
1054 | (state = 'error' OR state = 'waitocr')""").fetchone()[0]
1055 | if errcnt == 0:
1056 | log('没有任务要做', db=conn)
1057 | conn.commit()
1058 | return ''
1059 | checkpoint('“继续OCR”之前 [%s]'%new_engine)
1060 | c.execute("UPDATE ocrresult SET state = 'waitocr', engine = ? WHERE (state = 'waitocr' OR state = 'error') AND EXISTS (SELECT * FROM jobrange WHERE ocrresult.id = jobrange.id)", (new_engine,))
1061 | elif restarttype == 'all':
1062 | donecnt = c.execute("""
1063 | SELECT COUNT(id) FROM ocrresult WHERE EXISTS (SELECT * FROM jobrange WHERE ocrresult.id = jobrange.id) AND
1064 | (state = 'waitocr' OR state = 'error' OR state = 'waitocr' OR state = 'done')""").fetchone()[0]
1065 | if donecnt == 0:
1066 | log('没有任务要做', db=conn)
1067 | conn.commit()
1068 | return ''
1069 | checkpoint('“重新OCR”之前 [%s]'%new_engine)
1070 | c.execute("UPDATE ocrresult SET state = 'waitocr', engine = ? WHERE (state = 'waitocr' OR state = 'error' OR state = 'waitocr' OR state = 'done') AND EXISTS (SELECT * FROM jobrange WHERE ocrresult.id = jobrange.id)", (new_engine,))
1071 | elif restarttype == 'empty':
1072 | emptycnt = c.execute("""
1073 | SELECT COUNT(id) FROM ocrresult WHERE EXISTS (SELECT * FROM jobrange WHERE ocrresult.id = jobrange.id) AND
1074 | (state = 'done' AND ocrtext = '')""").fetchone()[0]
1075 | if emptycnt == 0:
1076 | log('没有任务要做', db=conn)
1077 | conn.commit()
1078 | return ''
1079 | checkpoint('“空项OCR”之前 [%s]'%new_engine)
1080 | c.execute("UPDATE ocrresult SET state = 'waitocr', engine = ? WHERE (state = 'done' AND ocrtext = '') AND EXISTS (SELECT * FROM jobrange WHERE ocrresult.id = jobrange.id)", (new_engine,))
1081 | else:
1082 | assert False
1083 | log('OCR任务已提交 [%s]'%new_engine, db=conn)
1084 | conn.commit()
1085 | startocr()
1086 | return 'ok'
1087 |
1088 | @app.after_request
1089 | def add_header(response):
1090 | response.cache_control.no_cache = True
1091 | response.cache_control.no_store = True
1092 | response.cache_control.must_revalidate = True
1093 | response.cache_control.max_age = 0
1094 | response.expires = werkzeug.http.parse_date('Thu, 19 Nov 1981 08:52:00 GMT')
1095 | response.headers['Pragma'] = 'no-cache'
1096 | response.close_connection = True
1097 | return response
1098 |
1099 | app.run(threaded=False, host=host, port=port)
1100 |
--------------------------------------------------------------------------------