├── .gitignore
├── Cargo.lock
├── Cargo.toml
├── LICENSE
├── Makefile
├── MakefileWindows64
├── README.md
├── alass-cli
    ├── Cargo.toml
    ├── examples
    │   └── generate_statistics_from_database.rs
    └── src
    │   ├── errors.rs
    │   ├── lib.rs
    │   ├── main.rs
    │   └── video_decoder
    │       ├── ffmpeg_binary.rs
    │       ├── ffmpeg_library.rs
    │       └── mod.rs
├── alass-core
    ├── Cargo.toml
    ├── README.md
    └── src
    │   ├── alass.rs
    │   ├── lib.rs
    │   ├── rating_type.rs
    │   ├── segments.rs
    │   ├── time_types.rs
    │   └── timespan_ops.rs
├── documentation
    ├── slides.pdf
    └── thesis.pdf
├── rustfmt.toml
└── statistics-helpers
    ├── export_subtitle_from_database.py
    ├── generate_database_from_videolist.py
    ├── generate_plots_from_statistics.py
    ├── list-all-subtitles.py
    ├── plots_from_videolist.sh
    └── worst_movies.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | target
 2 | .*
 3 | !.gitignore
 4 | *.ass
 5 | *.srt
 6 | *.svg
 7 | data
 8 | *.rs.bk
 9 | videos.list
10 | generated-data
11 | test_data
12 | 


--------------------------------------------------------------------------------
/Cargo.lock:
--------------------------------------------------------------------------------
  1 | # This file is automatically @generated by Cargo.
  2 | # It is not intended for manual editing.
  3 | [[package]]
  4 | name = "aho-corasick"
  5 | version = "0.6.10"
  6 | source = "registry+https://github.com/rust-lang/crates.io-index"
  7 | checksum = "81ce3d38065e618af2d7b77e10c5ad9a069859b4be3c2250f674af3840d9c8a5"
  8 | dependencies = [
  9 |  "memchr",
 10 | ]
 11 | 
 12 | [[package]]
 13 | name = "alass-cli"
 14 | version = "2.0.0"
 15 | dependencies = [
 16 |  "alass-core",
 17 |  "byteorder",
 18 |  "clap",
 19 |  "ctrlc",
 20 |  "encoding_rs",
 21 |  "failure",
 22 |  "libc",
 23 |  "pbr",
 24 |  "rand",
 25 |  "rmp-serde",
 26 |  "serde",
 27 |  "serde_json",
 28 |  "subparse",
 29 |  "threadpool",
 30 |  "webrtc-vad",
 31 | ]
 32 | 
 33 | [[package]]
 34 | name = "alass-core"
 35 | version = "2.0.0"
 36 | dependencies = [
 37 |  "rand",
 38 | ]
 39 | 
 40 | [[package]]
 41 | name = "ansi_term"
 42 | version = "0.11.0"
 43 | source = "registry+https://github.com/rust-lang/crates.io-index"
 44 | checksum = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b"
 45 | dependencies = [
 46 |  "winapi",
 47 | ]
 48 | 
 49 | [[package]]
 50 | name = "ascii"
 51 | version = "0.7.1"
 52 | source = "registry+https://github.com/rust-lang/crates.io-index"
 53 | checksum = "3ae7d751998c189c1d4468cf0a39bb2eae052a9c58d50ebb3b9591ee3813ad50"
 54 | 
 55 | [[package]]
 56 | name = "atty"
 57 | version = "0.2.13"
 58 | source = "registry+https://github.com/rust-lang/crates.io-index"
 59 | checksum = "1803c647a3ec87095e7ae7acfca019e98de5ec9a7d01343f611cf3152ed71a90"
 60 | dependencies = [
 61 |  "libc",
 62 |  "winapi",
 63 | ]
 64 | 
 65 | [[package]]
 66 | name = "autocfg"
 67 | version = "0.1.6"
 68 | source = "registry+https://github.com/rust-lang/crates.io-index"
 69 | checksum = "b671c8fb71b457dd4ae18c4ba1e59aa81793daacc361d82fcd410cef0d491875"
 70 | 
 71 | [[package]]
 72 | name = "backtrace"
 73 | version = "0.3.38"
 74 | source = "registry+https://github.com/rust-lang/crates.io-index"
 75 | checksum = "690a62be8920ccf773ee00ef0968649b0e724cda8bd5b12286302b4ae955fdf5"
 76 | dependencies = [
 77 |  "backtrace-sys",
 78 |  "cfg-if 0.1.10",
 79 |  "libc",
 80 |  "rustc-demangle",
 81 | ]
 82 | 
 83 | [[package]]
 84 | name = "backtrace-sys"
 85 | version = "0.1.31"
 86 | source = "registry+https://github.com/rust-lang/crates.io-index"
 87 | checksum = "82a830b4ef2d1124a711c71d263c5abdc710ef8e907bd508c88be475cebc422b"
 88 | dependencies = [
 89 |  "cc",
 90 |  "libc",
 91 | ]
 92 | 
 93 | [[package]]
 94 | name = "bitflags"
 95 | version = "1.2.0"
 96 | source = "registry+https://github.com/rust-lang/crates.io-index"
 97 | checksum = "8a606a02debe2813760609f57a64a2ffd27d9fdf5b2f133eaca0b248dd92cdd2"
 98 | 
 99 | [[package]]
100 | name = "byteorder"
101 | version = "1.3.2"
102 | source = "registry+https://github.com/rust-lang/crates.io-index"
103 | checksum = "a7c3dd8985a7111efc5c80b44e23ecdd8c007de8ade3b96595387e812b957cf5"
104 | 
105 | [[package]]
106 | name = "c2-chacha"
107 | version = "0.2.2"
108 | source = "registry+https://github.com/rust-lang/crates.io-index"
109 | checksum = "7d64d04786e0f528460fc884753cf8dddcc466be308f6026f8e355c41a0e4101"
110 | dependencies = [
111 |  "lazy_static 1.4.0",
112 |  "ppv-lite86",
113 | ]
114 | 
115 | [[package]]
116 | name = "cast"
117 | version = "0.2.2"
118 | source = "registry+https://github.com/rust-lang/crates.io-index"
119 | checksum = "926013f2860c46252efceabb19f4a6b308197505082c609025aa6706c011d427"
120 | 
121 | [[package]]
122 | name = "cc"
123 | version = "1.0.45"
124 | source = "registry+https://github.com/rust-lang/crates.io-index"
125 | checksum = "4fc9a35e1f4290eb9e5fc54ba6cf40671ed2a2514c3eeb2b2a908dda2ea5a1be"
126 | 
127 | [[package]]
128 | name = "cfg-if"
129 | version = "0.1.10"
130 | source = "registry+https://github.com/rust-lang/crates.io-index"
131 | checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822"
132 | 
133 | [[package]]
134 | name = "cfg-if"
135 | version = "1.0.0"
136 | source = "registry+https://github.com/rust-lang/crates.io-index"
137 | checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
138 | 
139 | [[package]]
140 | name = "chardet"
141 | version = "0.2.4"
142 | source = "registry+https://github.com/rust-lang/crates.io-index"
143 | checksum = "1a48563284b67c003ba0fb7243c87fab68885e1532c605704228a80238512e31"
144 | 
145 | [[package]]
146 | name = "clap"
147 | version = "2.33.0"
148 | source = "registry+https://github.com/rust-lang/crates.io-index"
149 | checksum = "5067f5bb2d80ef5d68b4c87db81601f0b75bca627bc2ef76b141d7b846a3c6d9"
150 | dependencies = [
151 |  "ansi_term",
152 |  "atty",
153 |  "bitflags",
154 |  "strsim",
155 |  "textwrap",
156 |  "unicode-width",
157 |  "vec_map",
158 | ]
159 | 
160 | [[package]]
161 | name = "combine"
162 | version = "2.5.2"
163 | source = "registry+https://github.com/rust-lang/crates.io-index"
164 | checksum = "1645a65a99c7c8d345761f4b75a6ffe5be3b3b27a93ee731fccc5050ba6be97c"
165 | dependencies = [
166 |  "ascii",
167 |  "byteorder",
168 | ]
169 | 
170 | [[package]]
171 | name = "ctrlc"
172 | version = "3.1.3"
173 | source = "registry+https://github.com/rust-lang/crates.io-index"
174 | checksum = "c7dfd2d8b4c82121dfdff120f818e09fc4380b0b7e17a742081a89b94853e87f"
175 | dependencies = [
176 |  "nix",
177 |  "winapi",
178 | ]
179 | 
180 | [[package]]
181 | name = "either"
182 | version = "1.5.3"
183 | source = "registry+https://github.com/rust-lang/crates.io-index"
184 | checksum = "bb1f6b1ce1c140482ea30ddd3335fc0024ac7ee112895426e0a629a6c20adfe3"
185 | 
186 | [[package]]
187 | name = "encoding_rs"
188 | version = "0.8.28"
189 | source = "registry+https://github.com/rust-lang/crates.io-index"
190 | checksum = "80df024fbc5ac80f87dfef0d9f5209a252f2a497f7f42944cff24d8253cac065"
191 | dependencies = [
192 |  "cfg-if 1.0.0",
193 | ]
194 | 
195 | [[package]]
196 | name = "enum_primitive"
197 | version = "0.1.1"
198 | source = "registry+https://github.com/rust-lang/crates.io-index"
199 | checksum = "be4551092f4d519593039259a9ed8daedf0da12e5109c5280338073eaeb81180"
200 | dependencies = [
201 |  "num-traits 0.1.43",
202 | ]
203 | 
204 | [[package]]
205 | name = "error-chain"
206 | version = "0.10.0"
207 | source = "registry+https://github.com/rust-lang/crates.io-index"
208 | checksum = "d9435d864e017c3c6afeac1654189b06cdb491cf2ff73dbf0d73b0f292f42ff8"
209 | dependencies = [
210 |  "backtrace",
211 | ]
212 | 
213 | [[package]]
214 | name = "failure"
215 | version = "0.1.8"
216 | source = "registry+https://github.com/rust-lang/crates.io-index"
217 | checksum = "d32e9bd16cc02eae7db7ef620b392808b89f6a5e16bb3497d159c6b92a0f4f86"
218 | dependencies = [
219 |  "backtrace",
220 |  "failure_derive",
221 | ]
222 | 
223 | [[package]]
224 | name = "failure_derive"
225 | version = "0.1.8"
226 | source = "registry+https://github.com/rust-lang/crates.io-index"
227 | checksum = "aa4da3c766cd7a0db8242e326e9e4e081edd567072893ed320008189715366a4"
228 | dependencies = [
229 |  "proc-macro2",
230 |  "quote",
231 |  "syn",
232 |  "synstructure",
233 | ]
234 | 
235 | [[package]]
236 | name = "getrandom"
237 | version = "0.1.12"
238 | source = "registry+https://github.com/rust-lang/crates.io-index"
239 | checksum = "473a1265acc8ff1e808cd0a1af8cee3c2ee5200916058a2ca113c29f2d903571"
240 | dependencies = [
241 |  "cfg-if 0.1.10",
242 |  "libc",
243 |  "wasi",
244 | ]
245 | 
246 | [[package]]
247 | name = "image"
248 | version = "0.13.0"
249 | source = "registry+https://github.com/rust-lang/crates.io-index"
250 | checksum = "1c3f4f5ea213ed9899eca760a8a14091d4b82d33e27cf8ced336ff730e9f6da8"
251 | dependencies = [
252 |  "byteorder",
253 |  "enum_primitive",
254 |  "num-iter",
255 |  "num-rational",
256 |  "num-traits 0.1.43",
257 | ]
258 | 
259 | [[package]]
260 | name = "itertools"
261 | version = "0.8.0"
262 | source = "registry+https://github.com/rust-lang/crates.io-index"
263 | checksum = "5b8467d9c1cebe26feb08c640139247fac215782d35371ade9a2136ed6085358"
264 | dependencies = [
265 |  "either",
266 | ]
267 | 
268 | [[package]]
269 | name = "itoa"
270 | version = "0.4.4"
271 | source = "registry+https://github.com/rust-lang/crates.io-index"
272 | checksum = "501266b7edd0174f8530248f87f99c88fbe60ca4ef3dd486835b8d8d53136f7f"
273 | 
274 | [[package]]
275 | name = "lazy_static"
276 | version = "0.2.11"
277 | source = "registry+https://github.com/rust-lang/crates.io-index"
278 | checksum = "76f033c7ad61445c5b347c7382dd1237847eb1bce590fe50365dcb33d546be73"
279 | 
280 | [[package]]
281 | name = "lazy_static"
282 | version = "1.4.0"
283 | source = "registry+https://github.com/rust-lang/crates.io-index"
284 | checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
285 | 
286 | [[package]]
287 | name = "libc"
288 | version = "0.2.62"
289 | source = "registry+https://github.com/rust-lang/crates.io-index"
290 | checksum = "34fcd2c08d2f832f376f4173a231990fa5aef4e99fb569867318a227ef4c06ba"
291 | 
292 | [[package]]
293 | name = "log"
294 | version = "0.3.9"
295 | source = "registry+https://github.com/rust-lang/crates.io-index"
296 | checksum = "e19e8d5c34a3e0e2223db8e060f9e8264aeeb5c5fc64a4ee9965c062211c024b"
297 | dependencies = [
298 |  "log 0.4.8",
299 | ]
300 | 
301 | [[package]]
302 | name = "log"
303 | version = "0.4.8"
304 | source = "registry+https://github.com/rust-lang/crates.io-index"
305 | checksum = "14b6052be84e6b71ab17edffc2eeabf5c2c3ae1fdb464aae35ac50c67a44e1f7"
306 | dependencies = [
307 |  "cfg-if 0.1.10",
308 | ]
309 | 
310 | [[package]]
311 | name = "memchr"
312 | version = "2.2.1"
313 | source = "registry+https://github.com/rust-lang/crates.io-index"
314 | checksum = "88579771288728879b57485cc7d6b07d648c9f0141eb955f8ab7f9d45394468e"
315 | 
316 | [[package]]
317 | name = "nix"
318 | version = "0.14.1"
319 | source = "registry+https://github.com/rust-lang/crates.io-index"
320 | checksum = "6c722bee1037d430d0f8e687bbdbf222f27cc6e4e68d5caf630857bb2b6dbdce"
321 | dependencies = [
322 |  "bitflags",
323 |  "cc",
324 |  "cfg-if 0.1.10",
325 |  "libc",
326 |  "void",
327 | ]
328 | 
329 | [[package]]
330 | name = "nom"
331 | version = "2.1.0"
332 | source = "registry+https://github.com/rust-lang/crates.io-index"
333 | checksum = "e5d4598834859fedb9a0a69d5b862a970e77982a92f544d547257a4d49469067"
334 | 
335 | [[package]]
336 | name = "num-integer"
337 | version = "0.1.41"
338 | source = "registry+https://github.com/rust-lang/crates.io-index"
339 | checksum = "b85e541ef8255f6cf42bbfe4ef361305c6c135d10919ecc26126c4e5ae94bc09"
340 | dependencies = [
341 |  "autocfg",
342 |  "num-traits 0.2.8",
343 | ]
344 | 
345 | [[package]]
346 | name = "num-iter"
347 | version = "0.1.39"
348 | source = "registry+https://github.com/rust-lang/crates.io-index"
349 | checksum = "76bd5272412d173d6bf9afdf98db8612bbabc9a7a830b7bfc9c188911716132e"
350 | dependencies = [
351 |  "autocfg",
352 |  "num-integer",
353 |  "num-traits 0.2.8",
354 | ]
355 | 
356 | [[package]]
357 | name = "num-rational"
358 | version = "0.1.42"
359 | source = "registry+https://github.com/rust-lang/crates.io-index"
360 | checksum = "ee314c74bd753fc86b4780aa9475da469155f3848473a261d2d18e35245a784e"
361 | dependencies = [
362 |  "num-integer",
363 |  "num-traits 0.2.8",
364 | ]
365 | 
366 | [[package]]
367 | name = "num-traits"
368 | version = "0.1.43"
369 | source = "registry+https://github.com/rust-lang/crates.io-index"
370 | checksum = "92e5113e9fd4cc14ded8e499429f396a20f98c772a47cc8622a736e1ec843c31"
371 | dependencies = [
372 |  "num-traits 0.2.8",
373 | ]
374 | 
375 | [[package]]
376 | name = "num-traits"
377 | version = "0.2.8"
378 | source = "registry+https://github.com/rust-lang/crates.io-index"
379 | checksum = "6ba9a427cfca2be13aa6f6403b0b7e7368fe982bfa16fccc450ce74c46cd9b32"
380 | dependencies = [
381 |  "autocfg",
382 | ]
383 | 
384 | [[package]]
385 | name = "num_cpus"
386 | version = "1.10.1"
387 | source = "registry+https://github.com/rust-lang/crates.io-index"
388 | checksum = "bcef43580c035376c0705c42792c294b66974abbfd2789b511784023f71f3273"
389 | dependencies = [
390 |  "libc",
391 | ]
392 | 
393 | [[package]]
394 | name = "numtoa"
395 | version = "0.1.0"
396 | source = "registry+https://github.com/rust-lang/crates.io-index"
397 | checksum = "b8f8bdf33df195859076e54ab11ee78a1b208382d3a26ec40d142ffc1ecc49ef"
398 | 
399 | [[package]]
400 | name = "pbr"
401 | version = "1.0.2"
402 | source = "registry+https://github.com/rust-lang/crates.io-index"
403 | checksum = "4403eb718d70c03ee279e51737782902c68cca01e870a33b6a2f9dfb50b9cd83"
404 | dependencies = [
405 |  "libc",
406 |  "termion",
407 |  "time",
408 |  "winapi",
409 | ]
410 | 
411 | [[package]]
412 | name = "ppv-lite86"
413 | version = "0.2.5"
414 | source = "registry+https://github.com/rust-lang/crates.io-index"
415 | checksum = "e3cbf9f658cdb5000fcf6f362b8ea2ba154b9f146a61c7a20d647034c6b6561b"
416 | 
417 | [[package]]
418 | name = "proc-macro2"
419 | version = "1.0.4"
420 | source = "registry+https://github.com/rust-lang/crates.io-index"
421 | checksum = "afdc77cc74ec70ed262262942ebb7dac3d479e9e5cfa2da1841c0806f6cdabcc"
422 | dependencies = [
423 |  "unicode-xid",
424 | ]
425 | 
426 | [[package]]
427 | name = "quote"
428 | version = "1.0.2"
429 | source = "registry+https://github.com/rust-lang/crates.io-index"
430 | checksum = "053a8c8bcc71fcce321828dc897a98ab9760bef03a4fc36693c231e5b3216cfe"
431 | dependencies = [
432 |  "proc-macro2",
433 | ]
434 | 
435 | [[package]]
436 | name = "rand"
437 | version = "0.7.2"
438 | source = "registry+https://github.com/rust-lang/crates.io-index"
439 | checksum = "3ae1b169243eaf61759b8475a998f0a385e42042370f3a7dbaf35246eacc8412"
440 | dependencies = [
441 |  "getrandom",
442 |  "libc",
443 |  "rand_chacha",
444 |  "rand_core",
445 |  "rand_hc",
446 | ]
447 | 
448 | [[package]]
449 | name = "rand_chacha"
450 | version = "0.2.1"
451 | source = "registry+https://github.com/rust-lang/crates.io-index"
452 | checksum = "03a2a90da8c7523f554344f921aa97283eadf6ac484a6d2a7d0212fa7f8d6853"
453 | dependencies = [
454 |  "c2-chacha",
455 |  "rand_core",
456 | ]
457 | 
458 | [[package]]
459 | name = "rand_core"
460 | version = "0.5.1"
461 | source = "registry+https://github.com/rust-lang/crates.io-index"
462 | checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19"
463 | dependencies = [
464 |  "getrandom",
465 | ]
466 | 
467 | [[package]]
468 | name = "rand_hc"
469 | version = "0.2.0"
470 | source = "registry+https://github.com/rust-lang/crates.io-index"
471 | checksum = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c"
472 | dependencies = [
473 |  "rand_core",
474 | ]
475 | 
476 | [[package]]
477 | name = "redox_syscall"
478 | version = "0.1.56"
479 | source = "registry+https://github.com/rust-lang/crates.io-index"
480 | checksum = "2439c63f3f6139d1b57529d16bc3b8bb855230c8efcc5d3a896c8bea7c3b1e84"
481 | 
482 | [[package]]
483 | name = "redox_termios"
484 | version = "0.1.1"
485 | source = "registry+https://github.com/rust-lang/crates.io-index"
486 | checksum = "7e891cfe48e9100a70a3b6eb652fef28920c117d366339687bd5576160db0f76"
487 | dependencies = [
488 |  "redox_syscall",
489 | ]
490 | 
491 | [[package]]
492 | name = "regex"
493 | version = "0.2.11"
494 | source = "registry+https://github.com/rust-lang/crates.io-index"
495 | checksum = "9329abc99e39129fcceabd24cf5d85b4671ef7c29c50e972bc5afe32438ec384"
496 | dependencies = [
497 |  "aho-corasick",
498 |  "memchr",
499 |  "regex-syntax",
500 |  "thread_local",
501 |  "utf8-ranges",
502 | ]
503 | 
504 | [[package]]
505 | name = "regex-syntax"
506 | version = "0.5.6"
507 | source = "registry+https://github.com/rust-lang/crates.io-index"
508 | checksum = "7d707a4fa2637f2dca2ef9fd02225ec7661fe01a53623c1e6515b6916511f7a7"
509 | dependencies = [
510 |  "ucd-util",
511 | ]
512 | 
513 | [[package]]
514 | name = "rmp"
515 | version = "0.8.8"
516 | source = "registry+https://github.com/rust-lang/crates.io-index"
517 | checksum = "0f594cb7ff8f1c5a7907f6be91f15795c8301e0d5718eb007fb5832723dd716e"
518 | dependencies = [
519 |  "byteorder",
520 |  "num-traits 0.2.8",
521 | ]
522 | 
523 | [[package]]
524 | name = "rmp-serde"
525 | version = "0.14.0"
526 | source = "registry+https://github.com/rust-lang/crates.io-index"
527 | checksum = "4a31c0798045f039ace94e0166f76478b3ba83116ec7c9d4bc934c5b13b8df21"
528 | dependencies = [
529 |  "byteorder",
530 |  "rmp",
531 |  "serde",
532 | ]
533 | 
534 | [[package]]
535 | name = "rustc-demangle"
536 | version = "0.1.16"
537 | source = "registry+https://github.com/rust-lang/crates.io-index"
538 | checksum = "4c691c0e608126e00913e33f0ccf3727d5fc84573623b8d65b2df340b5201783"
539 | 
540 | [[package]]
541 | name = "ryu"
542 | version = "1.0.0"
543 | source = "registry+https://github.com/rust-lang/crates.io-index"
544 | checksum = "c92464b447c0ee8c4fb3824ecc8383b81717b9f1e74ba2e72540aef7b9f82997"
545 | 
546 | [[package]]
547 | name = "safemem"
548 | version = "0.2.0"
549 | source = "registry+https://github.com/rust-lang/crates.io-index"
550 | checksum = "e27a8b19b835f7aea908818e871f5cc3a5a186550c30773be987e155e8163d8f"
551 | 
552 | [[package]]
553 | name = "serde"
554 | version = "1.0.101"
555 | source = "registry+https://github.com/rust-lang/crates.io-index"
556 | checksum = "9796c9b7ba2ffe7a9ce53c2287dfc48080f4b2b362fcc245a259b3a7201119dd"
557 | dependencies = [
558 |  "serde_derive",
559 | ]
560 | 
561 | [[package]]
562 | name = "serde_derive"
563 | version = "1.0.101"
564 | source = "registry+https://github.com/rust-lang/crates.io-index"
565 | checksum = "4b133a43a1ecd55d4086bd5b4dc6c1751c68b1bfbeba7a5040442022c7e7c02e"
566 | dependencies = [
567 |  "proc-macro2",
568 |  "quote",
569 |  "syn",
570 | ]
571 | 
572 | [[package]]
573 | name = "serde_json"
574 | version = "1.0.40"
575 | source = "registry+https://github.com/rust-lang/crates.io-index"
576 | checksum = "051c49229f282f7c6f3813f8286cc1e3323e8051823fce42c7ea80fe13521704"
577 | dependencies = [
578 |  "itoa",
579 |  "ryu",
580 |  "serde",
581 | ]
582 | 
583 | [[package]]
584 | name = "strsim"
585 | version = "0.8.0"
586 | source = "registry+https://github.com/rust-lang/crates.io-index"
587 | checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a"
588 | 
589 | [[package]]
590 | name = "subparse"
591 | version = "0.7.0"
592 | source = "registry+https://github.com/rust-lang/crates.io-index"
593 | checksum = "a08e0e5404c97213cd361c86370969ca47a7ec3571509710117e707b9f7c29ab"
594 | dependencies = [
595 |  "chardet",
596 |  "combine",
597 |  "encoding_rs",
598 |  "failure",
599 |  "itertools",
600 |  "vobsub",
601 | ]
602 | 
603 | [[package]]
604 | name = "syn"
605 | version = "1.0.5"
606 | source = "registry+https://github.com/rust-lang/crates.io-index"
607 | checksum = "66850e97125af79138385e9b88339cbcd037e3f28ceab8c5ad98e64f0f1f80bf"
608 | dependencies = [
609 |  "proc-macro2",
610 |  "quote",
611 |  "unicode-xid",
612 | ]
613 | 
614 | [[package]]
615 | name = "synstructure"
616 | version = "0.12.4"
617 | source = "registry+https://github.com/rust-lang/crates.io-index"
618 | checksum = "b834f2d66f734cb897113e34aaff2f1ab4719ca946f9a7358dba8f8064148701"
619 | dependencies = [
620 |  "proc-macro2",
621 |  "quote",
622 |  "syn",
623 |  "unicode-xid",
624 | ]
625 | 
626 | [[package]]
627 | name = "termion"
628 | version = "1.5.3"
629 | source = "registry+https://github.com/rust-lang/crates.io-index"
630 | checksum = "6a8fb22f7cde82c8220e5aeacb3258ed7ce996142c77cba193f203515e26c330"
631 | dependencies = [
632 |  "libc",
633 |  "numtoa",
634 |  "redox_syscall",
635 |  "redox_termios",
636 | ]
637 | 
638 | [[package]]
639 | name = "textwrap"
640 | version = "0.11.0"
641 | source = "registry+https://github.com/rust-lang/crates.io-index"
642 | checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060"
643 | dependencies = [
644 |  "unicode-width",
645 | ]
646 | 
647 | [[package]]
648 | name = "thread_local"
649 | version = "0.3.6"
650 | source = "registry+https://github.com/rust-lang/crates.io-index"
651 | checksum = "c6b53e329000edc2b34dbe8545fd20e55a333362d0a321909685a19bd28c3f1b"
652 | dependencies = [
653 |  "lazy_static 1.4.0",
654 | ]
655 | 
656 | [[package]]
657 | name = "threadpool"
658 | version = "1.7.1"
659 | source = "registry+https://github.com/rust-lang/crates.io-index"
660 | checksum = "e2f0c90a5f3459330ac8bc0d2f879c693bb7a2f59689c1083fc4ef83834da865"
661 | dependencies = [
662 |  "num_cpus",
663 | ]
664 | 
665 | [[package]]
666 | name = "time"
667 | version = "0.1.42"
668 | source = "registry+https://github.com/rust-lang/crates.io-index"
669 | checksum = "db8dcfca086c1143c9270ac42a2bbd8a7ee477b78ac8e45b19abfb0cbede4b6f"
670 | dependencies = [
671 |  "libc",
672 |  "redox_syscall",
673 |  "winapi",
674 | ]
675 | 
676 | [[package]]
677 | name = "ucd-util"
678 | version = "0.1.5"
679 | source = "registry+https://github.com/rust-lang/crates.io-index"
680 | checksum = "fa9b3b49edd3468c0e6565d85783f51af95212b6fa3986a5500954f00b460874"
681 | 
682 | [[package]]
683 | name = "unicode-width"
684 | version = "0.1.6"
685 | source = "registry+https://github.com/rust-lang/crates.io-index"
686 | checksum = "7007dbd421b92cc6e28410fe7362e2e0a2503394908f417b68ec8d1c364c4e20"
687 | 
688 | [[package]]
689 | name = "unicode-xid"
690 | version = "0.2.0"
691 | source = "registry+https://github.com/rust-lang/crates.io-index"
692 | checksum = "826e7639553986605ec5979c7dd957c7895e93eabed50ab2ffa7f6128a75097c"
693 | 
694 | [[package]]
695 | name = "utf8-ranges"
696 | version = "1.0.4"
697 | source = "registry+https://github.com/rust-lang/crates.io-index"
698 | checksum = "b4ae116fef2b7fea257ed6440d3cfcff7f190865f170cdad00bb6465bf18ecba"
699 | 
700 | [[package]]
701 | name = "vec_map"
702 | version = "0.8.1"
703 | source = "registry+https://github.com/rust-lang/crates.io-index"
704 | checksum = "05c78687fb1a80548ae3250346c3db86a80a7cdd77bda190189f2d0a0987c81a"
705 | 
706 | [[package]]
707 | name = "vobsub"
708 | version = "0.2.3"
709 | source = "registry+https://github.com/rust-lang/crates.io-index"
710 | checksum = "aa122d660e26d9b6aa8f3436304b667ec81cbc0d48a5d19640d7e55ca8eac812"
711 | dependencies = [
712 |  "cast",
713 |  "error-chain",
714 |  "image",
715 |  "lazy_static 0.2.11",
716 |  "log 0.3.9",
717 |  "nom",
718 |  "regex",
719 |  "safemem",
720 | ]
721 | 
722 | [[package]]
723 | name = "void"
724 | version = "1.0.2"
725 | source = "registry+https://github.com/rust-lang/crates.io-index"
726 | checksum = "6a02e4885ed3bc0f2de90ea6dd45ebcbb66dacffe03547fadbb0eeae2770887d"
727 | 
728 | [[package]]
729 | name = "wasi"
730 | version = "0.7.0"
731 | source = "registry+https://github.com/rust-lang/crates.io-index"
732 | checksum = "b89c3ce4ce14bdc6fb6beaf9ec7928ca331de5df7e5ea278375642a2f478570d"
733 | 
734 | [[package]]
735 | name = "webrtc-vad"
736 | version = "0.4.0"
737 | source = "registry+https://github.com/rust-lang/crates.io-index"
738 | checksum = "39a1e40fd6ca90be95459152a2537f2ba4286ee1b13073f7ebcaa74fc94e3008"
739 | dependencies = [
740 |  "cc",
741 | ]
742 | 
743 | [[package]]
744 | name = "winapi"
745 | version = "0.3.8"
746 | source = "registry+https://github.com/rust-lang/crates.io-index"
747 | checksum = "8093091eeb260906a183e6ae1abdba2ef5ef2257a21801128899c3fc699229c6"
748 | dependencies = [
749 |  "winapi-i686-pc-windows-gnu",
750 |  "winapi-x86_64-pc-windows-gnu",
751 | ]
752 | 
753 | [[package]]
754 | name = "winapi-i686-pc-windows-gnu"
755 | version = "0.4.0"
756 | source = "registry+https://github.com/rust-lang/crates.io-index"
757 | checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
758 | 
759 | [[package]]
760 | name = "winapi-x86_64-pc-windows-gnu"
761 | version = "0.4.0"
762 | source = "registry+https://github.com/rust-lang/crates.io-index"
763 | checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
764 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [workspace]
 2 | 
 3 | members = [
 4 |     "alass-core",
 5 |     "alass-cli"
 6 | ]
 7 | 
 8 | [profile.release]
 9 | lto = true
10 | opt-level = 3


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | package_windows64:
 2 | 	curl https://ffmpeg.zeranoe.com/builds/win64/shared/ffmpeg-4.2-win64-shared.zip -o target/ffmpeg.zip
 3 | 	unzip target/ffmpeg.zip -d target
 4 | 	mv target/ffmpeg-4.2-win64-shared target/ffmpeg
 5 | 	echo
 6 | 	mkdir target/alass-windows64
 7 | 	mkdir target/alass-windows64/ffmpeg
 8 | 	mkdir target/alass-windows64/bin
 9 | 	curl https://www.gnu.org/licenses/gpl-3.0.txt > target/alass-windows64/bin/LICENSE.txt
10 | 	cp target/ffmpeg/LICENSE.txt target/alass-windows64/ffmpeg/LICENSE.txt
11 | 	cp target/ffmpeg/README.txt target/alass-windows64/ffmpeg/README.txt
12 | 	cp -r target/ffmpeg/bin target/alass-windows64/ffmpeg/bin
13 | 	rm target/alass-windows64/ffmpeg/bin/ffplay.exe
14 | 	cargo build --release --target x86_64-pc-windows-gnu
15 | 	cp target/x86_64-pc-windows-gnu/release/alass-cli.exe target/alass-windows64/bin
16 | 	echo -ne '@echo off\r\nset ALASS_FFMPEG_PATH=%~dp0ffmpeg\\bin\\ffmpeg.exe\r\nset ALASS_FFPROBE_PATH=%~dp0ffmpeg\\bin\\ffprobe.exe\r\n"%~dp0bin\\alass-cli.exe" %*\r\n' > target/alass-windows64/alass.bat
17 | 	( cd target; zip -J -r alass-windows64.zip alass-windows64 )
18 | 
19 | 
20 | clean_windows64:
21 | 	rm target/alass-windows64.zip -f
22 | 	rm target/ffmpeg-4.2-win64-shared.zip -f
23 | 	rm target/ffmpeg-4.2-win64-shared -rf
24 | 	rm target/ffmpeg -rf
25 | 	rm target/alass-windows64 -rf
26 | 
27 | package_linux64:
28 | 	cargo build --release --target x86_64-unknown-linux-musl
29 | 	cp ./target/x86_64-unknown-linux-musl/release/alass-cli ./target/alass-linux64
30 | 


--------------------------------------------------------------------------------
/MakefileWindows64:
--------------------------------------------------------------------------------
 1 | package_windows64:
 2 | 	curl https://ffmpeg.zeranoe.com/builds/win64/shared/ffmpeg-4.2-win64-shared.zip -o target/ffmpeg.zip
 3 | 	unzip target/ffmpeg.zip -d target
 4 | 	mv target/ffmpeg-4.2-win64-shared target/ffmpeg
 5 | 	echo
 6 | 	mkdir target/alass-windows64
 7 | 	mkdir target/alass-windows64/ffmpeg
 8 | 	mkdir target/alass-windows64/bin
 9 | 	curl https://www.gnu.org/licenses/gpl-3.0.txt > target/alass-windows64/bin/LICENSE.txt
10 | 	cp target/ffmpeg/LICENSE.txt target/alass-windows64/ffmpeg/LICENSE.txt
11 | 	cp target/ffmpeg/README.txt target/alass-windows64/ffmpeg/README.txt
12 | 	cp -r target/ffmpeg/bin target/alass-windows64/ffmpeg/bin
13 | 	rm target/alass-windows64/ffmpeg/bin/ffplay.exe
14 | 	cargo build --release --target x86_64-pc-windows-gnu
15 | 	cp target/x86_64-pc-windows-gnu/release/alass-cli.exe target/alass-windows64/bin
16 | 	echo -ne '@echo off\r\nset ALASS_FFMPEG_PATH=%~dp0ffmpeg\\bin\\ffmpeg.exe\r\nset ALASS_FFPROBE_PATH=%~dp0ffmpeg\\bin\\ffprobe.exe\r\n"%~dp0bin\\alass-cli.exe" %*\r\n' > target/alass-windows64/alass.bat
17 | 	( cd target; zip -J -r alass-windows64.zip alass-windows64 )
18 | 
19 | 
20 | clean_windows64:
21 | 	rm target/alass-windows64.zip -f
22 | 	rm target/ffmpeg-4.2-win64-shared.zip -f
23 | 	rm target/ffmpeg-4.2-win64-shared -rf
24 | 	rm target/ffmpeg -rf
25 | 	rm target/alass-windows64 -rf
26 | 
27 | package_linux64:
28 | 	cargo build --release --target x86_64-unknown-linux-musl
29 | 	cp ./target/x86_64-unknown-linux-musl/release/alass-cli ./target/alass-linux64
30 | 
31 | clean_windows64:
32 | 	rm target/alass-linux64
33 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Introduction
  2 | 
  3 | `alass` is a command line tool to synchronize subtitles to movies.
  4 | 
  5 | It can automatically correct
  6 | 
  7 |  - constant offsets
  8 |  - splits due to advertisement breaks, directors cut, ...
  9 |  - different framerates
 10 | 
 11 | The alignment process is not only fast and
 12 | accurate, but also language-agnostic. This means
 13 | you can align subtitles to movies in different
 14 | languages.
 15 | 
 16 | `alass` stands for  "Automatic Language-Agnostic Subtitle Synchronization". The theory and algorithms
 17 | are documented in my [bachelor's thesis](documentation/thesis.pdf)
 18 | and summarized in my [bachelor's presentation](documentation/slides.pdf).
 19 | 
 20 | 
 21 | ## Executable for Windows (64-bit)
 22 | 
 23 | Get the latest executable from [here](https://github.com/kaegi/alass/releases)! Just download and extract the archive. The file `alass.bat` is the command line tool.
 24 | 
 25 | ## Executable for Linux (64-bit)
 26 | 
 27 | Get the latest executable from [here](https://github.com/kaegi/alass/releases)! To run the executable, `ffmpeg` and
 28 | `ffprobe` have to be installed.
 29 | You can change their paths with the environment variables
 30 | `ALASS_FFMPEG_PATH` (default `ffmpeg`) and `ALASS_FFPROBE_PATH` (default `ffprobe`). 
 31 | 
 32 | ## Usage
 33 | 
 34 | The most basic command is:
 35 | 
 36 | ```bash
 37 | $ alass movie.mp4 incorrect_subtitle.srt output.srt
 38 | ```
 39 | 
 40 | You can also use `alass` to align the incorrect subtitle to a different subtitle:
 41 | 
 42 | ```bash
 43 | $ alass reference_subtitle.ssa incorrect_subtitle.srt output.srt
 44 | ```
 45 | 
 46 | You can additionally adjust how much the algorithm tries to avoid introducing or removing a break:
 47 | 
 48 | ```bash
 49 | # split-penalty is a value between 0 and 1000 (default 7)
 50 | $ alass reference_subtitle.ssa incorrect_subtitle.srt output.srt --split-penalty 10
 51 | ```
 52 | 
 53 | Values between 5 and 20 are the most useful. Anything above 20 misses some important splits and anything below 5 introduces many unnecessary splits.
 54 | 
 55 | If you only want to shift the subtitle, without introducing splits, you can use `--no-splits`:
 56 | 
 57 | ```bash
 58 | # synchronizing the subtitles in this mode is very fast
 59 | $ alass movie.mp4 incorrect_subtitle.srt output.srt --no-splits
 60 | ```
 61 | 
 62 | Currently supported are `.srt`, `.ssa`/`.ass` and `.idx` files. Every common video format is supported for the reference file.
 63 | 
 64 | 
 65 | ## Performance and Results
 66 | 
 67 | The extraction of the audio from a video takes about 10 to 20 seconds. Computing the alignment usually takes between 5 and 10 seconds.
 68 | 
 69 | The alignment is usually perfect -
 70 | the percentage of "good subtitles" is about 88% to 98%, depending on how strict you classify a "good subtitle".
 71 | Downloading random subtitles
 72 | from `OpenSubtitles.org` had an error rate of about 50%
 73 | (sample size N=118).
 74 | Of all subtitle _lines_ (not subtitle files) in the tested database,
 75 | after synchronization
 76 | 
 77 |  - 50% were within 50ms of target position
 78 |  - 80% were within 100ms of target position
 79 |  - 90% were within 400ms of target position
 80 |  - 95% were within 800ms of target position
 81 | 
 82 | compared to a (possibly not perfect) reference subtitle.
 83 | 
 84 | ## How to compile the binary
 85 | 
 86 | Install [Rust and Cargo](https://www.rust-lang.org/en-US/install.html) then run:
 87 | 
 88 | ```bash
 89 | # this will create the lastest release in ~/.cargo/bin/alass-cli
 90 | $ cargo install alass-cli
 91 | ```
 92 | 
 93 | 
 94 | The voice-activity module this project uses is written in C. Therefore a C compiler (`gcc` or `clang`) is needed to compile this project.
 95 | 
 96 | To use `alass-cli` with video files, `ffmpeg` and `ffprobe` have to be installed. It is used to extract the raw audio data. You can set the paths used by `alass` using the environment variables `ALASS_FFMPEG_PATH` (default `ffmpeg`) and `ALASS_FFPROBE_PATH` (default `ffprobe`). 
 97 | 
 98 | ### Building from Source 
 99 | 
100 | If you want to build and run the project from source code:
101 | 
102 | ```bash
103 | $ git clone https://github.com/kaegi/alass
104 | $ cd alass
105 | $ cargo build
106 | $ cargo run -- movie.mp4 input.srt output.srt
107 | ```
108 | 
109 | ### Configuration
110 | 
111 | All parameters are shown for `cargo build` can also be used for `cargo install` and `cargo run`.
112 | 
113 | #### FFmpeg as a library
114 | 
115 | You can also link `ffmpeg` as a dynamic library during compile time. The library implementation can extract the audio about 2 to 3 seconds faster. Unfortunately it is harder to compile, the error handling is only very basic and might still have bugs.
116 | 
117 | You have to remove "`# FFMPEG-LIB`" from every line that starts with it in `alass-cli/Cargo.toml`. Then use:
118 | 
119 | ```bash
120 | # Important: you have to be inside `alass-cli`! Otherwise the parameters get ignored.
121 | $ cargo build --no-default-features --features ffmpeg-library
122 | ```
123 | 
124 | 
125 | ### Alias Setup
126 | 
127 | *For Linux users:* It is recommended to add the folder path to your system path as well as setup an alias for `alass` to `alass-cli`. Add this to your `~/.bashrc` (or the setup file of your favorite shell):
128 | 
129 | ```bash
130 | export PATH="$PATH:$HOME/.cargo/bin"
131 | alias alass="alass-cli"
132 | ```
133 | 
134 | ## Folder structure
135 | 
136 | This `cargo` workspace contains two projects:
137 | 
138 |   - `alass-core` which provides the algorithm
139 |   
140 |     It is targeted at *developers* who want to use the same algorithm in their project.
141 | 
142 |   - `alass-cli` which is the official command line tool
143 | 
144 |     It is target at *end users* who want to correct their subtitles.
145 | 
146 | ## Library Documentation
147 | 
148 | [Open README](./alass-core/README.md) from `alass-core`.
149 | 
150 | ## Notes
151 | 
152 | This program was called `aligner` in the past. This made it nearly impossible to find on a search engine, so `alass` was chosen instead.


--------------------------------------------------------------------------------
/alass-cli/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "alass-cli"
 3 | version = "2.0.0"
 4 | authors = ["kaegi <kaegi.dev@gmail.com>"]
 5 | description = "Automatic Language-Agnostic Subtitle Synchronization (Command Line Tool)"
 6 | repository = "https://github.com/kaegi/alass"
 7 | documentation = "https://docs.rs/alass-cli"
 8 | readme = "../README.md"
 9 | keywords = ["align", "subtitle", "automatic", "api", "tool"]
10 | license = "GPL-3.0"
11 | edition = "2018"
12 | 
13 | [features]
14 | default = ["ffmpeg-binary"]
15 | 
16 | # use exactly one of these two features
17 | ffmpeg-binary = ["byteorder"] 
18 | # FFMPEG-LIB ffmpeg-library = ["ffmpeg-sys"]
19 | 
20 | 
21 | [dependencies]
22 | alass-core = { version = "2.0.0", path = "../alass-core" }
23 | webrtc-vad = "0.4.0"
24 | subparse = "0.7.0"
25 | 
26 | clap = "2.33.0"
27 | pbr = "1.0.0-alpha.2"
28 | encoding_rs = "0.8.17"
29 | libc = "0.2.60"
30 | failure = "0.1.5"
31 | serde_json = "1.0.40"
32 | serde = { version = "1.0.98", features = ["derive", "rc"] }
33 | byteorder = { version = "1.3.2", optional = true }
34 | 
35 | [dev-dependencies]
36 | rmp-serde = "0.14.0"
37 | threadpool = "1.7.1"
38 | ctrlc = "3.1.3"
39 | rand = "0.7.2"
40 | 
41 | # FFMPEG-LIB [dependencies.ffmpeg-sys]
42 | # FFMPEG-LIB optional = true
43 | # FFMPEG-LIB git = "https://github.com/meh/rust-ffmpeg-sys"
44 | # FFMPEG-LIB rev = "4f14151b9b8134f1f029d49d02cbea5c7337dedb"
45 | 


--------------------------------------------------------------------------------
/alass-cli/src/errors.rs:
--------------------------------------------------------------------------------
  1 | // This file is part of the Rust library and binary `alass`.
  2 | //
  3 | // Copyright (C) 2017 kaegi
  4 | //
  5 | // This program is free software: you can redistribute it and/or modify
  6 | // it under the terms of the GNU General Public License as published by
  7 | // the Free Software Foundation, either version 3 of the License, or
  8 | // (at your option) any later version.
  9 | //
 10 | // This program is distributed in the hope that it will be useful,
 11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 | // GNU General Public License for more details.
 14 | //
 15 | // You should have received a copy of the GNU General Public License
 16 | // along with this program.  If not, see <http://www.gnu.org/licenses/>.
 17 | 
 18 | use failure::{Backtrace, Context, Fail};
 19 | use std::fmt;
 20 | use std::path::PathBuf;
 21 | use subparse::SubtitleFormat;
 22 | 
 23 | #[macro_export]
 24 | macro_rules! define_error {
 25 |     ($error:ident, $errorKind:ident) => {
 26 |         #[derive(Debug)]
 27 |         pub struct $error {
 28 |             inner: Context<$errorKind>,
 29 |         }
 30 | 
 31 |         impl Fail for $error {
 32 |             fn name(&self) -> Option<&str> {
 33 |                 self.inner.name()
 34 |             }
 35 | 
 36 |             fn cause(&self) -> Option<&dyn Fail> {
 37 |                 self.inner.cause()
 38 |             }
 39 | 
 40 |             fn backtrace(&self) -> Option<&Backtrace> {
 41 |                 self.inner.backtrace()
 42 |             }
 43 |         }
 44 | 
 45 |         impl fmt::Display for $error {
 46 |             fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 47 |                 fmt::Display::fmt(&self.inner, f)
 48 |             }
 49 |         }
 50 | 
 51 |         #[allow(dead_code)]
 52 |         impl $error {
 53 |             pub fn kind(&self) -> &$errorKind {
 54 |                 self.inner.get_context()
 55 |             }
 56 |         }
 57 | 
 58 |         #[allow(dead_code)]
 59 |         impl $errorKind {
 60 |             pub fn into_error(self) -> $error {
 61 |                 $error {
 62 |                     inner: Context::new(self),
 63 |                 }
 64 |             }
 65 |         }
 66 | 
 67 |         impl From<$errorKind> for $error {
 68 |             fn from(kind: $errorKind) -> $error {
 69 |                 $error {
 70 |                     inner: Context::new(kind),
 71 |                 }
 72 |             }
 73 |         }
 74 | 
 75 |         impl From<Context<$errorKind>> for $error {
 76 |             fn from(inner: Context<$errorKind>) -> $error {
 77 |                 $error { inner: inner }
 78 |             }
 79 |         }
 80 |     };
 81 | }
 82 | 
 83 | define_error!(InputFileError, InputFileErrorKind);
 84 | 
 85 | #[derive(Clone, Eq, PartialEq, Debug, Fail)]
 86 | pub enum InputFileErrorKind {
 87 |     VideoFile(PathBuf),
 88 |     SubtitleFile(PathBuf),
 89 | }
 90 | 
 91 | impl fmt::Display for InputFileErrorKind {
 92 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 93 |         match self {
 94 |             InputFileErrorKind::VideoFile(p) => write!(f, "processing video file '{}' failed", p.display()),
 95 |             InputFileErrorKind::SubtitleFile(p) => write!(f, "processing subtitle file '{}' failed", p.display()),
 96 |         }
 97 |     }
 98 | }
 99 | 
100 | define_error!(FileOperationError, FileOperationErrorKind);
101 | 
102 | #[derive(Clone, Eq, PartialEq, Debug, Fail)]
103 | pub enum FileOperationErrorKind {
104 |     FileOpen { path: PathBuf },
105 |     FileRead { path: PathBuf },
106 |     FileWrite { path: PathBuf },
107 | }
108 | 
109 | impl fmt::Display for FileOperationErrorKind {
110 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
111 |         match self {
112 |             FileOperationErrorKind::FileOpen { path } => write!(f, "failed to open file '{}'", path.display()),
113 |             FileOperationErrorKind::FileRead { path } => write!(f, "failed to read file '{}'", path.display()),
114 |             FileOperationErrorKind::FileWrite { path } => write!(f, "failed to read file '{}'", path.display()),
115 |         }
116 |     }
117 | }
118 | 
119 | define_error!(InputVideoError, InputVideoErrorKind);
120 | 
121 | #[derive(Clone, Eq, PartialEq, Debug, Fail)]
122 | pub enum InputVideoErrorKind {
123 |     FailedToDecode { path: PathBuf },
124 |     VadAnalysisFailed,
125 | }
126 | 
127 | impl fmt::Display for InputVideoErrorKind {
128 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
129 |         match self {
130 |             InputVideoErrorKind::FailedToDecode { path } => {
131 |                 write!(f, "failed to extract voice segments from file '{}'", path.display())
132 |             }
133 |             InputVideoErrorKind::VadAnalysisFailed => write!(f, "failed to analyse audio segment for voice activity"),
134 |         }
135 |     }
136 | }
137 | 
138 | define_error!(InputSubtitleError, InputSubtitleErrorKind);
139 | 
140 | #[derive(Clone, Eq, PartialEq, Debug, Fail)]
141 | pub enum InputSubtitleErrorKind {
142 |     ReadingSubtitleFileFailed(PathBuf),
143 |     UnknownSubtitleFormat(PathBuf),
144 |     ParsingSubtitleFailed(PathBuf),
145 |     RetreivingSubtitleLinesFailed(PathBuf),
146 | }
147 | 
148 | impl fmt::Display for InputSubtitleErrorKind {
149 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
150 |         match self {
151 |             InputSubtitleErrorKind::ReadingSubtitleFileFailed(path) => {
152 |                 write!(f, "reading subtitle file '{}' failed", path.display())
153 |             }
154 |             InputSubtitleErrorKind::UnknownSubtitleFormat(path) => {
155 |                 write!(f, "unknown subtitle format for file '{}'", path.display())
156 |             }
157 |             InputSubtitleErrorKind::ParsingSubtitleFailed(path) => {
158 |                 write!(f, "parsing subtitle file '{}' failed", path.display())
159 |             }
160 |             InputSubtitleErrorKind::RetreivingSubtitleLinesFailed(path) => {
161 |                 write!(f, "retreiving subtitle file '{}' failed", path.display())
162 |             }
163 |         }
164 |     }
165 | }
166 | 
167 | define_error!(InputArgumentsError, InputArgumentsErrorKind);
168 | 
169 | #[derive(Clone, PartialEq, Debug, Fail)]
170 | pub enum InputArgumentsErrorKind {
171 |     #[fail(
172 |         display = "expected value '{}' to be in range '{}'-'{}', found value '{}'",
173 |         argument_name, min, max, value
174 |     )]
175 |     ValueNotInRange {
176 |         argument_name: String,
177 |         min: f64,
178 |         max: f64,
179 |         value: f64,
180 |     },
181 |     #[fail(display = "expected positive number for '{}', found '{}'", argument_name, value)]
182 |     ExpectedPositiveNumber { argument_name: String, value: i64 },
183 | 
184 |     #[fail(display = "expected non-negative number for '{}', found '{}'", argument_name, value)]
185 |     ExpectedNonNegativeNumber { argument_name: String, value: f64 },
186 | 
187 |     #[fail(display = "argument '{}' with value '{}' could not be parsed", argument_name, value)]
188 |     ArgumentParseError { argument_name: String, value: String },
189 | }
190 | 
191 | define_error!(TopLevelError, TopLevelErrorKind);
192 | 
193 | pub enum TopLevelErrorKind {
194 |     FileFormatMismatch {
195 |         input_file_path: PathBuf,
196 |         output_file_path: PathBuf,
197 |         input_file_format: SubtitleFormat,
198 |     },
199 |     FailedToUpdateSubtitle,
200 |     FailedToGenerateSubtitleData,
201 |     FailedToInstantiateSubtitleFile,
202 | }
203 | 
204 | impl fmt::Display for TopLevelErrorKind {
205 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
206 |         match self {
207 |          TopLevelErrorKind::FileFormatMismatch { input_file_path, output_file_path, input_file_format } => write!(f, "output file '{}' seems to have a different format than input file '{}' with format '{}' (this program does not perform conversions)", output_file_path.display(), input_file_path.display(), input_file_format.get_name()),
208 |          TopLevelErrorKind::FailedToUpdateSubtitle => write!(f, "failed to change lines in the subtitle"),
209 |          TopLevelErrorKind::FailedToGenerateSubtitleData => write!(f, "failed to generate data for subtitle"),
210 |          TopLevelErrorKind::FailedToInstantiateSubtitleFile => write!(f, "failed to instantiate subtitle file"),
211 |         }
212 |     }
213 | }
214 | 


--------------------------------------------------------------------------------
/alass-cli/src/lib.rs:
--------------------------------------------------------------------------------
  1 | use alass_core::{TimeDelta as AlgTimeDelta, TimePoint as AlgTimePoint, TimeSpan as AlgTimeSpan};
  2 | use encoding_rs::Encoding;
  3 | use failure::ResultExt;
  4 | use pbr::ProgressBar;
  5 | use std::cmp::{max, min};
  6 | use std::ffi::OsStr;
  7 | use std::fs::File;
  8 | use std::io::{Read, Write};
  9 | use std::path::{Path, PathBuf};
 10 | use std::result::Result;
 11 | 
 12 | use errors::*;
 13 | 
 14 | pub mod errors;
 15 | pub mod video_decoder;
 16 | 
 17 | use subparse::timetypes::*;
 18 | use subparse::{get_subtitle_format_err, parse_bytes, SubtitleFile};
 19 | 
 20 | pub const PKG_VERSION: Option<&'static str> = option_env!("CARGO_PKG_VERSION");
 21 | pub const PKG_NAME: Option<&'static str> = option_env!("CARGO_PKG_NAME");
 22 | pub const PKG_DESCRIPTION: Option<&'static str> = option_env!("CARGO_PKG_DESCRIPTION");
 23 | 
 24 | /*#[derive(Debug, Clone, PartialEq, Eq, Copy)]
 25 | pub enum VideoFileFormat {
 26 |     /// we don't need to differentiate between video file formats in current code
 27 |     NotImplemented,
 28 | }*/
 29 | 
 30 | pub struct NoProgressInfo {}
 31 | 
 32 | impl alass_core::ProgressHandler for NoProgressInfo {
 33 |     fn init(&mut self, _steps: i64) {}
 34 |     fn inc(&mut self) {}
 35 |     fn finish(&mut self) {}
 36 | }
 37 | 
 38 | impl video_decoder::ProgressHandler for NoProgressInfo {
 39 |     fn init(&mut self, _steps: i64) {}
 40 |     fn inc(&mut self) {}
 41 |     fn finish(&mut self) {}
 42 | }
 43 | 
 44 | pub struct ProgressInfo {
 45 |     init_msg: Option<String>,
 46 |     prescaler: i64,
 47 |     counter: i64,
 48 |     progress_bar: Option<ProgressBar<std::io::Stdout>>,
 49 | }
 50 | 
 51 | impl ProgressInfo {
 52 |     pub fn new(prescaler: i64, init_msg: Option<String>) -> ProgressInfo {
 53 |         ProgressInfo {
 54 |             init_msg: init_msg,
 55 |             prescaler,
 56 |             counter: 0,
 57 |             progress_bar: None,
 58 |         }
 59 |     }
 60 | }
 61 | 
 62 | impl ProgressInfo {
 63 |     fn init(&mut self, steps: i64) {
 64 |         self.progress_bar = Some(ProgressBar::new((steps / self.prescaler) as u64));
 65 |         if let Some(init_msg) = &self.init_msg {
 66 |             println!("{}", init_msg);
 67 |         }
 68 |     }
 69 | 
 70 |     fn inc(&mut self) {
 71 |         self.counter = self.counter + 1;
 72 |         if self.counter == self.prescaler {
 73 |             self.progress_bar.as_mut().unwrap().inc();
 74 |             self.counter = 0;
 75 |         }
 76 |     }
 77 | 
 78 |     fn finish(&mut self) {
 79 |         self.progress_bar.as_mut().unwrap().finish_println("\n");
 80 |     }
 81 | }
 82 | 
 83 | impl alass_core::ProgressHandler for ProgressInfo {
 84 |     fn init(&mut self, steps: i64) {
 85 |         self.init(steps)
 86 |     }
 87 |     fn inc(&mut self) {
 88 |         self.inc()
 89 |     }
 90 |     fn finish(&mut self) {
 91 |         self.finish()
 92 |     }
 93 | }
 94 | 
 95 | impl video_decoder::ProgressHandler for ProgressInfo {
 96 |     fn init(&mut self, steps: i64) {
 97 |         self.init(steps)
 98 |     }
 99 |     fn inc(&mut self) {
100 |         self.inc()
101 |     }
102 |     fn finish(&mut self) {
103 |         self.finish()
104 |     }
105 | }
106 | 
107 | pub fn read_file_to_bytes(path: &Path) -> std::result::Result<Vec<u8>, FileOperationError> {
108 |     let mut file = File::open(path).with_context(|_| FileOperationErrorKind::FileOpen {
109 |         path: path.to_path_buf(),
110 |     })?;
111 |     let mut v = Vec::new();
112 |     file.read_to_end(&mut v)
113 |         .with_context(|_| FileOperationErrorKind::FileRead {
114 |             path: path.to_path_buf(),
115 |         })?;
116 |     Ok(v)
117 | }
118 | 
119 | pub fn write_data_to_file(path: &Path, d: Vec<u8>) -> std::result::Result<(), FileOperationError> {
120 |     let mut file = File::create(path).with_context(|_| FileOperationErrorKind::FileOpen {
121 |         path: path.to_path_buf(),
122 |     })?;
123 |     file.write_all(&d).with_context(|_| FileOperationErrorKind::FileWrite {
124 |         path: path.to_path_buf(),
125 |     })?;
126 |     Ok(())
127 | }
128 | 
129 | pub fn timing_to_alg_timepoint(t: TimePoint, interval: i64) -> AlgTimePoint {
130 |     assert!(interval > 0);
131 |     AlgTimePoint::from(t.msecs() / interval)
132 | }
133 | 
134 | pub fn alg_delta_to_delta(t: AlgTimeDelta, interval: i64) -> TimeDelta {
135 |     assert!(interval > 0);
136 |     let time_int: i64 = t.into();
137 |     TimeDelta::from_msecs(time_int * interval)
138 | }
139 | 
140 | pub fn timings_to_alg_timespans(v: &[TimeSpan], interval: i64) -> Vec<AlgTimeSpan> {
141 |     v.iter()
142 |         .cloned()
143 |         .map(|timespan| {
144 |             AlgTimeSpan::new_safe(
145 |                 timing_to_alg_timepoint(timespan.start, interval),
146 |                 timing_to_alg_timepoint(timespan.end, interval),
147 |             )
148 |         })
149 |         .collect()
150 | }
151 | 
152 | pub fn alg_deltas_to_timing_deltas(v: &[AlgTimeDelta], interval: i64) -> Vec<TimeDelta> {
153 |     v.iter().cloned().map(|x| alg_delta_to_delta(x, interval)).collect()
154 | }
155 | 
156 | /// Groups consecutive timespans with the same delta together.
157 | pub fn get_subtitle_delta_groups(mut v: Vec<(AlgTimeDelta, TimeSpan)>) -> Vec<(AlgTimeDelta, Vec<TimeSpan>)> {
158 |     v.sort_by_key(|t| min((t.1).start, (t.1).end));
159 | 
160 |     let mut result: Vec<(AlgTimeDelta, Vec<TimeSpan>)> = Vec::new();
161 | 
162 |     for (delta, original_timespan) in v {
163 |         let mut new_block = false;
164 | 
165 |         if let Some(last_tuple_ref) = result.last_mut() {
166 |             if delta == last_tuple_ref.0 {
167 |                 last_tuple_ref.1.push(original_timespan);
168 |             } else {
169 |                 new_block = true;
170 |             }
171 |         } else {
172 |             new_block = true;
173 |         }
174 | 
175 |         if new_block {
176 |             result.push((delta, vec![original_timespan]));
177 |         }
178 |     }
179 | 
180 |     result
181 | }
182 | 
183 | pub enum InputFileHandler {
184 |     Subtitle(SubtitleFileHandler),
185 |     Video(VideoFileHandler),
186 | }
187 | 
188 | pub struct SubtitleFileHandler {
189 |     file_format: subparse::SubtitleFormat,
190 |     subtitle_file: SubtitleFile,
191 |     subparse_timespans: Vec<subparse::timetypes::TimeSpan>,
192 | }
193 | 
194 | impl SubtitleFileHandler {
195 |     pub fn open_sub_file(
196 |         file_path: &Path,
197 |         sub_encoding: Option<&'static Encoding>,
198 |         sub_fps: f64,
199 |     ) -> Result<SubtitleFileHandler, InputSubtitleError> {
200 |         let sub_data = read_file_to_bytes(file_path.as_ref())
201 |             .with_context(|_| InputSubtitleErrorKind::ReadingSubtitleFileFailed(file_path.to_path_buf()))?;
202 | 
203 |         let file_format = get_subtitle_format_err(file_path.extension(), &sub_data)
204 |             .with_context(|_| InputSubtitleErrorKind::UnknownSubtitleFormat(file_path.to_path_buf()))?;
205 | 
206 |         let parsed_subtitle_data: SubtitleFile = parse_bytes(file_format, &sub_data, sub_encoding, sub_fps)
207 |             .with_context(|_| InputSubtitleErrorKind::ParsingSubtitleFailed(file_path.to_path_buf()))?;
208 | 
209 |         let subparse_timespans: Vec<subparse::timetypes::TimeSpan> = parsed_subtitle_data
210 |             .get_subtitle_entries()
211 |             .with_context(|_| InputSubtitleErrorKind::RetreivingSubtitleLinesFailed(file_path.to_path_buf()))?
212 |             .into_iter()
213 |             .map(|subentry| subentry.timespan)
214 |             .map(|timespan: subparse::timetypes::TimeSpan| {
215 |                 TimeSpan::new(min(timespan.start, timespan.end), max(timespan.start, timespan.end))
216 |             })
217 |             .collect();
218 | 
219 |         Ok(SubtitleFileHandler {
220 |             file_format: file_format,
221 |             subparse_timespans,
222 |             subtitle_file: parsed_subtitle_data,
223 |         })
224 |     }
225 | 
226 |     pub fn file_format(&self) -> subparse::SubtitleFormat {
227 |         self.file_format
228 |     }
229 | 
230 |     pub fn timespans(&self) -> &[subparse::timetypes::TimeSpan] {
231 |         self.subparse_timespans.as_slice()
232 |     }
233 | 
234 |     pub fn into_subtitle_file(self) -> subparse::SubtitleFile {
235 |         self.subtitle_file
236 |     }
237 | }
238 | 
239 | pub struct VideoFileHandler {
240 |     //video_file_format: VideoFileFormat,
241 |     subparse_timespans: Vec<subparse::timetypes::TimeSpan>,
242 |     //aligner_timespans: Vec<alass_core::TimeSpan>,
243 | }
244 | 
245 | impl VideoFileHandler {
246 |     pub fn from_cache(timespans: Vec<subparse::timetypes::TimeSpan>) -> VideoFileHandler {
247 |         VideoFileHandler {
248 |             subparse_timespans: timespans,
249 |         }
250 |     }
251 | 
252 |     pub fn open_video_file(
253 |         file_path: &Path,
254 |         audio_index: Option<usize>,
255 |         video_decode_progress: impl video_decoder::ProgressHandler,
256 |     ) -> Result<VideoFileHandler, InputVideoError> {
257 |         //video_decoder::VideoDecoder::decode(file_path, );
258 |         use webrtc_vad::*;
259 | 
260 |         struct WebRtcFvad {
261 |             fvad: Vad,
262 |             vad_buffer: Vec<bool>,
263 |         }
264 | 
265 |         impl video_decoder::AudioReceiver for WebRtcFvad {
266 |             type Output = Vec<bool>;
267 |             type Error = InputVideoError;
268 | 
269 |             fn push_samples(&mut self, samples: &[i16]) -> Result<(), InputVideoError> {
270 |                 // the chunked audio receiver should only provide 10ms of 8000kHz -> 80 samples
271 |                 assert!(samples.len() == 80);
272 | 
273 |                 let is_voice = self
274 |                     .fvad
275 |                     .is_voice_segment(samples)
276 |                     .map_err(|_| InputVideoErrorKind::VadAnalysisFailed)?;
277 | 
278 |                 self.vad_buffer.push(is_voice);
279 | 
280 |                 Ok(())
281 |             }
282 | 
283 |             fn finish(self) -> Result<Vec<bool>, InputVideoError> {
284 |                 Ok(self.vad_buffer)
285 |             }
286 |         }
287 | 
288 |         let vad_processor = WebRtcFvad {
289 |             fvad: Vad::new_with_rate(SampleRate::Rate8kHz),
290 |             vad_buffer: Vec::new(),
291 |         };
292 | 
293 |         let chunk_processor = video_decoder::ChunkedAudioReceiver::new(80, vad_processor);
294 | 
295 |         let vad_buffer = video_decoder::VideoDecoder::decode(file_path, audio_index, chunk_processor, video_decode_progress)
296 |             .with_context(|_| InputVideoErrorKind::FailedToDecode {
297 |                 path: PathBuf::from(file_path),
298 |             })?;
299 | 
300 |         let mut voice_segments: Vec<(i64, i64)> = Vec::new();
301 |         let mut voice_segment_start: i64 = 0;
302 | 
303 |         let combine_with_distance_lower_than = 0 / 10;
304 | 
305 |         let mut last_segment_end: i64 = 0;
306 |         let mut already_saved_span = true;
307 | 
308 |         for (i, is_voice_segment) in vad_buffer.into_iter().chain(std::iter::once(false)).enumerate() {
309 |             let i = i as i64;
310 | 
311 |             if is_voice_segment {
312 |                 last_segment_end = i;
313 |                 if already_saved_span {
314 |                     voice_segment_start = i;
315 |                     already_saved_span = false;
316 |                 }
317 |             } else {
318 |                 // not a voice segment
319 |                 if i - last_segment_end >= combine_with_distance_lower_than && !already_saved_span {
320 |                     voice_segments.push((voice_segment_start, last_segment_end));
321 |                     already_saved_span = true;
322 |                 }
323 |             }
324 |         }
325 | 
326 |         let subparse_timespans: Vec<subparse::timetypes::TimeSpan> = voice_segments
327 |             .into_iter()
328 |             .map(|(start, end)| {
329 |                 subparse::timetypes::TimeSpan::new(
330 |                     subparse::timetypes::TimePoint::from_msecs(start * 10),
331 |                     subparse::timetypes::TimePoint::from_msecs(end * 10),
332 |                 )
333 |             })
334 |             .collect();
335 | 
336 |         Ok(VideoFileHandler {
337 |             //video_file_format: VideoFileFormat::NotImplemented,
338 |             subparse_timespans,
339 |         })
340 |     }
341 | 
342 |     pub fn filter_with_min_span_length_ms(&mut self, min_vad_span_length_ms: i64) {
343 |         self.subparse_timespans = self
344 |             .subparse_timespans
345 |             .iter()
346 |             .filter(|ts| ts.len() >= TimeDelta::from_msecs(min_vad_span_length_ms))
347 |             .cloned()
348 |             .collect();
349 |     }
350 | 
351 |     pub fn timespans(&self) -> &[subparse::timetypes::TimeSpan] {
352 |         self.subparse_timespans.as_slice()
353 |     }
354 | }
355 | 
356 | impl InputFileHandler {
357 |     pub fn open(
358 |         file_path: &Path,
359 |         audio_index: Option<usize>,
360 |         sub_encoding: Option<&'static Encoding>,
361 |         sub_fps: f64,
362 |         video_decode_progress: impl video_decoder::ProgressHandler,
363 |     ) -> Result<InputFileHandler, InputFileError> {
364 |         let known_subitle_endings: [&str; 6] = ["srt", "vob", "idx", "ass", "ssa", "sub"];
365 | 
366 |         let extension: Option<&OsStr> = file_path.extension();
367 | 
368 |         for &subtitle_ending in known_subitle_endings.iter() {
369 |             if extension == Some(OsStr::new(subtitle_ending)) {
370 |                 return Ok(SubtitleFileHandler::open_sub_file(file_path, sub_encoding, sub_fps)
371 |                     .map(|v| InputFileHandler::Subtitle(v))
372 |                     .with_context(|_| InputFileErrorKind::SubtitleFile(file_path.to_path_buf()))?);
373 |             }
374 |         }
375 | 
376 |         return Ok(VideoFileHandler::open_video_file(file_path, audio_index, video_decode_progress)
377 |             .map(|v| InputFileHandler::Video(v))
378 |             .with_context(|_| InputFileErrorKind::VideoFile(file_path.to_path_buf()))?);
379 |     }
380 | 
381 |     pub fn into_subtitle_file(self) -> Option<SubtitleFile> {
382 |         match self {
383 |             InputFileHandler::Video(_) => None,
384 |             InputFileHandler::Subtitle(sub_handler) => Some(sub_handler.subtitle_file),
385 |         }
386 |     }
387 | 
388 |     pub fn timespans(&self) -> &[subparse::timetypes::TimeSpan] {
389 |         match self {
390 |             InputFileHandler::Video(video_handler) => video_handler.timespans(),
391 |             InputFileHandler::Subtitle(sub_handler) => sub_handler.timespans(),
392 |         }
393 |     }
394 | 
395 |     pub fn filter_video_with_min_span_length_ms(&mut self, min_vad_span_length_ms: i64) {
396 |         if let InputFileHandler::Video(video_handler) = self {
397 |             video_handler.filter_with_min_span_length_ms(min_vad_span_length_ms);
398 |         }
399 |     }
400 | }
401 | 
402 | pub fn guess_fps_ratio(
403 |     ref_spans: &[alass_core::TimeSpan],
404 |     in_spans: &[alass_core::TimeSpan],
405 |     ratios: &[f64],
406 |     mut progress_handler: impl alass_core::ProgressHandler,
407 | ) -> (Option<usize>, alass_core::TimeDelta) {
408 |     progress_handler.init(ratios.len() as i64);
409 |     let (delta, score) = alass_core::align_nosplit(
410 |         ref_spans,
411 |         in_spans,
412 |         alass_core::overlap_scoring,
413 |         alass_core::NoProgressHandler,
414 |     );
415 |     progress_handler.inc();
416 | 
417 |     //let desc = ["25/24", "25/23.976", "24/25", "24/23.976", "23.976/25", "23.976/24"];
418 |     //println!("score 1: {}", score);
419 | 
420 |     let (mut opt_idx, mut opt_delta, mut opt_score) = (None, delta, score);
421 | 
422 |     for (scale_factor_idx, scaling_factor) in ratios.iter().cloned().enumerate() {
423 |         let stretched_in_spans: Vec<alass_core::TimeSpan> =
424 |             in_spans.iter().map(|ts| ts.scaled(scaling_factor)).collect();
425 | 
426 |         let (delta, score) = alass_core::align_nosplit(
427 |             ref_spans,
428 |             &stretched_in_spans,
429 |             alass_core::overlap_scoring,
430 |             alass_core::NoProgressHandler,
431 |         );
432 |         progress_handler.inc();
433 | 
434 |         //println!("score {}: {}", desc[scale_factor_idx], score);
435 | 
436 |         if score > opt_score {
437 |             opt_score = score;
438 |             opt_idx = Some(scale_factor_idx);
439 |             opt_delta = delta;
440 |         }
441 |     }
442 | 
443 |     progress_handler.finish();
444 | 
445 |     (opt_idx, opt_delta)
446 | }
447 | 
448 | pub fn print_error_chain(error: failure::Error) {
449 |     let show_bt_opt = std::env::vars()
450 |         .find(|(key, _)| key == "RUST_BACKTRACE")
451 |         .map(|(_, value)| value);
452 |     let show_bt = show_bt_opt != None && show_bt_opt != Some("0".to_string());
453 | 
454 |     println!("error: {}", error);
455 |     if show_bt {
456 |         println!("stack trace: {}", error.backtrace());
457 |     }
458 | 
459 |     for cause in error.as_fail().iter_causes() {
460 |         println!("caused by: {}", cause);
461 |         if show_bt {
462 |             if let Some(backtrace) = cause.backtrace() {
463 |                 println!("stack trace: {}", backtrace);
464 |             }
465 |         }
466 |     }
467 | 
468 |     if !show_bt {
469 |         println!("");
470 |         println!("not: run with environment variable 'RUST_BACKTRACE=1' for detailed stack traces");
471 |     }
472 | }
473 | 


--------------------------------------------------------------------------------
/alass-cli/src/main.rs:
--------------------------------------------------------------------------------
  1 | // This file is part of the Rust library and binary `alass`.
  2 | //
  3 | // Copyright (C) 2017 kaegi
  4 | //
  5 | // This program is free software: you can redistribute it and/or modify
  6 | // it under the terms of the GNU General Public License as published by
  7 | // the Free Software Foundation, either version 3 of the License, or
  8 | // (at your option) any later version.
  9 | //
 10 | // This program is distributed in the hope that it will be useful,
 11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 | // GNU General Public License for more details.
 14 | //
 15 | // You should have received a copy of the GNU General Public License
 16 | // along with this program.  If not, see <http://www.gnu.org/licenses/>.
 17 | 
 18 | #![allow(unknown_lints)] // for clippy
 19 | 
 20 | // TODO: search for unsafe, panic, unimplemented
 21 | 
 22 | extern crate clap;
 23 | extern crate encoding_rs;
 24 | extern crate pbr;
 25 | extern crate subparse;
 26 | 
 27 | // Alg* stands for algorithm (the internal alass algorithm types)
 28 | 
 29 | use crate::subparse::SubtitleFileInterface;
 30 | 
 31 | use alass_core::{align, TimeDelta as AlgTimeDelta};
 32 | use clap::{App, Arg};
 33 | use encoding_rs::Encoding;
 34 | use failure::ResultExt;
 35 | use std::ffi::OsStr;
 36 | use std::path::PathBuf;
 37 | use std::result::Result;
 38 | use std::str::FromStr;
 39 | 
 40 | use subparse::timetypes::*;
 41 | use subparse::{SubtitleEntry, SubtitleFormat};
 42 | 
 43 | use alass_cli::errors::*;
 44 | use alass_cli::*;
 45 | 
 46 | /// Does reading, parsing and nice error handling for a f64 clap parameter.
 47 | fn unpack_clap_number_f64(
 48 |     matches: &clap::ArgMatches,
 49 |     parameter_name: &'static str,
 50 | ) -> Result<f64, InputArgumentsError> {
 51 |     let paramter_value_str: &str = matches.value_of(parameter_name).unwrap();
 52 |     f64::from_str(paramter_value_str)
 53 |         .with_context(|_| {
 54 |             InputArgumentsErrorKind::ArgumentParseError {
 55 |                 argument_name: parameter_name.to_string(),
 56 |                 value: paramter_value_str.to_string(),
 57 |             }
 58 |             .into()
 59 |         })
 60 |         .map_err(|e| InputArgumentsError::from(e))
 61 | }
 62 | 
 63 | /// Does reading, parsing and nice error handling for a f64 clap parameter.
 64 | fn unpack_clap_number_i64(
 65 |     matches: &clap::ArgMatches,
 66 |     parameter_name: &'static str,
 67 | ) -> Result<i64, InputArgumentsError> {
 68 |     let paramter_value_str: &str = matches.value_of(parameter_name).unwrap();
 69 |     i64::from_str(paramter_value_str)
 70 |         .with_context(|_| {
 71 |             InputArgumentsErrorKind::ArgumentParseError {
 72 |                 argument_name: parameter_name.to_string(),
 73 |                 value: paramter_value_str.to_string(),
 74 |             }
 75 |             .into()
 76 |         })
 77 |         .map_err(|e| InputArgumentsError::from(e))
 78 | }
 79 | 
 80 | fn unpack_optional_clap_number_usize(
 81 |     matches: &clap::ArgMatches,
 82 |     parameter_name: &'static str,
 83 | ) -> Result<Option<usize>, InputArgumentsError> {
 84 |     
 85 |     match matches.value_of(parameter_name) {
 86 |         None => Ok(None),
 87 |         Some(parameter_value_str) => {
 88 |             usize::from_str(parameter_value_str)
 89 |                 .with_context(|_| {
 90 |                     InputArgumentsErrorKind::ArgumentParseError {
 91 |                         argument_name: parameter_name.to_string(),
 92 |                         value: parameter_value_str.to_string(),
 93 |                     }
 94 |                     .into()
 95 |                 })
 96 |                 .map(|v| Some(v))
 97 |                 .map_err(|e| InputArgumentsError::from(e))
 98 |         }
 99 |     }
100 | }
101 | 
102 | pub fn get_encoding(opt: Option<&str>) -> Option<&'static Encoding> {
103 |     match opt {
104 |         None | Some("auto") => {
105 |             // use automatic detection
106 |             None
107 |         },
108 |         Some(label) => {
109 |             match Encoding::for_label_no_replacement(label.as_bytes()) {
110 |                 None => {
111 |                     // TODO: error handling
112 |                     panic!("{} is not a known encoding label; exiting.", label);
113 |                 }
114 |                 Some(encoding) => Some(encoding),
115 |             }
116 |         }
117 |     }
118 | }
119 | 
120 | // //////////////////////////////////////////////////////////////////////////////////////////////////
121 | 
122 | struct Arguments {
123 |     reference_file_path: PathBuf,
124 |     incorrect_file_path: PathBuf,
125 |     output_file_path: PathBuf,
126 | 
127 |     interval: i64,
128 | 
129 |     split_penalty: f64,
130 | 
131 |     sub_fps_inc: f64,
132 |     sub_fps_ref: f64,
133 | 
134 |     allow_negative_timestamps: bool,
135 | 
136 |     /// having a value of `None` means autodetect encoding
137 |     encoding_ref: Option<&'static Encoding>,
138 |     encoding_inc: Option<&'static Encoding>,
139 | 
140 |     guess_fps_ratio: bool,
141 |     no_split_mode: bool,
142 |     speed_optimization: Option<f64>,
143 | 
144 |     audio_index: Option<usize>,
145 | }
146 | 
147 | fn parse_args() -> Result<Arguments, InputArgumentsError> {
148 |     let matches = App::new(PKG_NAME.unwrap_or("unkown (not compiled with cargo)"))
149 |         .version(PKG_VERSION.unwrap_or("unknown (not compiled with cargo)"))
150 |         .about(PKG_DESCRIPTION.unwrap_or("unknown (not compiled with cargo)"))
151 |         .arg(Arg::with_name("reference-file")
152 |             .help("Path to the reference subtitle or video file")
153 |             .required(true))
154 |         .arg(Arg::with_name("incorrect-sub-file")
155 |             .help("Path to the incorrect subtitle file")
156 |             .required(true))
157 |         .arg(Arg::with_name("output-file-path")
158 |             .help("Path to corrected subtitle file")
159 |             .required(true))
160 |         .arg(Arg::with_name("split-penalty")
161 |             .short("p")
162 |             .long("split-penalty")
163 |             .value_name("floating point number from 0 to 1000")
164 |             .help("Determines how eager the algorithm is to avoid splitting of the subtitles. 1000 means that all lines will be shifted by the same offset, while 0.01 will produce MANY segments with different offsets. Values from 1 to 20 are the most useful.")
165 |             .default_value("7"))
166 |         .arg(Arg::with_name("interval")
167 |             .short("i")
168 |             .long("interval")
169 |             .value_name("integer in milliseconds")
170 |             .help("The smallest recognized time interval, smaller numbers make the alignment more accurate, greater numbers make aligning faster.")
171 |             .default_value("1"))
172 |         .arg(Arg::with_name("allow-negative-timestamps")
173 |             .short("n")
174 |             .long("allow-negative-timestamps")
175 |             .help("Negative timestamps can lead to problems with the output file, so by default 0 will be written instead. This option allows you to disable this behavior."))
176 |         .arg(Arg::with_name("sub-fps-ref")
177 |             .long("sub-fps-ref")
178 |             .value_name("floating-point number in frames-per-second")
179 |             .default_value("30")
180 |             .help("Specifies the frames-per-second for the accompanying video of MicroDVD `.sub` files (MicroDVD `.sub` files store timing information as frame numbers). Only affects the reference subtitle file."))
181 |         .arg(Arg::with_name("sub-fps-inc")
182 |             .long("sub-fps-inc")
183 |             .value_name("floating-point number in frames-per-second")
184 |             .default_value("30")
185 |             .help("Specifies the frames-per-second for the accompanying video of MicroDVD `.sub` files (MicroDVD `.sub` files store timing information as frame numbers). Only affects the incorrect subtitle file."))
186 |         .arg(Arg::with_name("encoding-ref")
187 |             .long("encoding-ref")
188 |             .value_name("encoding")
189 |             .help("Charset encoding of the reference subtitle file.")
190 |             .default_value("auto"))
191 |         .arg(Arg::with_name("encoding-inc")
192 |             .long("encoding-inc")
193 |             .value_name("encoding")
194 |             .help("Charset encoding of the incorrect subtitle file.")
195 |             .default_value("auto"))
196 |         .arg(Arg::with_name("speed-optimization")
197 |             .long("speed-optimization")
198 |             .short("O")
199 |             .value_name("path")
200 |             .default_value("1")
201 |             .help("(greatly) speeds up synchronization by sacrificing some accuracy; set to 0 to disable speed optimization")
202 |             .required(false)
203 |             )
204 |         .arg(Arg::with_name("statistics-required-tag")
205 |             .long("statistics-required-tag")
206 |             .short("t")
207 |             .value_name("tag")
208 |             .help("only output statistics containing this tag (you can find the tags in statistics file)")
209 |             .required(false)
210 |             )
211 |         .arg(Arg::with_name("no-split")
212 |             .help("synchronize subtitles without looking for splits/breaks - this mode is much faster")
213 |             .short("l")
214 |             .long("no-split")
215 |         )
216 |         .arg(Arg::with_name("disable-fps-guessing")
217 |             .help("disables guessing and correcting of framerate differences between reference file and input file")
218 |             .short("g")
219 |             .long("disable-fps-guessing")
220 |             .alias("disable-framerate-guessing")
221 |         )
222 |         .arg(Arg::with_name("audio-index")
223 |             .help("specifies the audio index in the reference video file")
224 |             .long("index")
225 |             .value_name("audio-index")
226 |             .required(false)
227 |         )
228 |         .after_help("This program works with .srt, .ass/.ssa, .idx and .sub files. The corrected file will have the same format as the incorrect file.")
229 |         .get_matches();
230 | 
231 |     let reference_file_path: PathBuf = matches.value_of("reference-file").unwrap().into();
232 |     let incorrect_file_path: PathBuf = matches.value_of("incorrect-sub-file").unwrap().into();
233 |     let output_file_path: PathBuf = matches.value_of("output-file-path").unwrap().into();
234 | 
235 |     let interval: i64 = unpack_clap_number_i64(&matches, "interval")?;
236 |     if interval < 1 {
237 |         return Err(InputArgumentsErrorKind::ExpectedPositiveNumber {
238 |             argument_name: "interval".to_string(),
239 |             value: interval,
240 |         }
241 |         .into());
242 |     }
243 | 
244 |     let split_penalty: f64 = unpack_clap_number_f64(&matches, "split-penalty")?;
245 |     if split_penalty < 0.0 || split_penalty > 1000.0 {
246 |         return Err(InputArgumentsErrorKind::ValueNotInRange {
247 |             argument_name: "interval".to_string(),
248 |             value: split_penalty,
249 |             min: 0.0,
250 |             max: 1000.0,
251 |         }
252 |         .into());
253 |     }
254 | 
255 |     let speed_optimization: f64 = unpack_clap_number_f64(&matches, "speed-optimization")?;
256 |     if split_penalty < 0.0 {
257 |         return Err(InputArgumentsErrorKind::ExpectedNonNegativeNumber {
258 |             argument_name: "speed-optimization".to_string(),
259 |             value: speed_optimization,
260 |         }
261 |         .into());
262 |     }
263 | 
264 |     let no_split_mode: bool = matches.is_present("no-split");
265 | 
266 |     Ok(Arguments {
267 |         reference_file_path,
268 |         incorrect_file_path,
269 |         output_file_path,
270 |         interval,
271 |         split_penalty,
272 |         sub_fps_ref: unpack_clap_number_f64(&matches, "sub-fps-ref")?,
273 |         sub_fps_inc: unpack_clap_number_f64(&matches, "sub-fps-inc")?,
274 |         allow_negative_timestamps: matches.is_present("allow-negative-timestamps"),
275 |         encoding_ref: get_encoding(matches.value_of("encoding-ref")),
276 |         encoding_inc: get_encoding(matches.value_of("encoding-inc")),
277 |         no_split_mode,
278 |         guess_fps_ratio: !matches.is_present("disable-fps-guessing"),
279 |         speed_optimization: if speed_optimization <= 0. {
280 |             None
281 |         } else {
282 |             Some(speed_optimization)
283 |         },
284 |         audio_index: unpack_optional_clap_number_usize(&matches, "audio-index")?
285 |     })
286 | }
287 | 
288 | fn prepare_reference_file(args: &Arguments) -> Result<InputFileHandler, failure::Error> {
289 |     let mut ref_file = InputFileHandler::open(
290 |         &args.reference_file_path,
291 |         args.audio_index,
292 |         args.encoding_ref,
293 |         args.sub_fps_ref,
294 |         ProgressInfo::new(
295 |             500,
296 |             Some(format!(
297 |                 "extracting audio from reference file '{}'...",
298 |                 args.reference_file_path.display()
299 |             )),
300 |         ),
301 |     )?;
302 | 
303 |     ref_file.filter_video_with_min_span_length_ms(500);
304 | 
305 |     Ok(ref_file)
306 | }
307 | 
308 | // //////////////////////////////////////////////////////////////////////////////////////////////////
309 | 
310 | fn run() -> Result<(), failure::Error> {
311 |     let args = parse_args()?;
312 | 
313 |     if args.incorrect_file_path.eq(OsStr::new("_")) {
314 |         // DEBUG MODE FOR REFERENCE FILE WAS ACTIVATED
315 |         let ref_file = prepare_reference_file(&args)?;
316 | 
317 |         println!("input file path was given as '_'");
318 |         println!("the output file is a .srt file only containing timing information from the reference file");
319 |         println!("this can be used as a debugging tool");
320 |         println!();
321 | 
322 |         let lines: Vec<(subparse::timetypes::TimeSpan, String)> = ref_file
323 |             .timespans()
324 |             .iter()
325 |             .cloned()
326 |             .enumerate()
327 |             .map(|(i, time_span)| (time_span, format!("line {}", i)))
328 |             .collect();
329 | 
330 |         let debug_file =
331 |             subparse::SrtFile::create(lines).with_context(|_| TopLevelErrorKind::FailedToInstantiateSubtitleFile)?;
332 | 
333 |         write_data_to_file(
334 |             &args.output_file_path,
335 |             debug_file.to_data().unwrap(), // error handling
336 |         )?;
337 | 
338 |         return Ok(());
339 |     }
340 | 
341 |     // open incorrect file before reference file before so that incorrect-file-not-found-errors are not displayed after the long audio extraction
342 |     let inc_file =
343 |         SubtitleFileHandler::open_sub_file(args.incorrect_file_path.as_path(), args.encoding_inc, args.sub_fps_inc)?;
344 | 
345 |     let ref_file = prepare_reference_file(&args)?;
346 | 
347 |     let output_file_format = inc_file.file_format();
348 | 
349 |     // this program internally stores the files in a non-destructable way (so
350 |     // formatting is preserved) but has no abilty to convert between formats
351 |     if !subparse::is_valid_extension_for_subtitle_format(args.output_file_path.extension(), output_file_format) {
352 |         return Err(TopLevelErrorKind::FileFormatMismatch {
353 |             input_file_path: args.incorrect_file_path,
354 |             output_file_path: args.output_file_path,
355 |             input_file_format: inc_file.file_format(),
356 |         }
357 |         .into_error()
358 |         .into());
359 |     }
360 | 
361 |     let mut inc_aligner_timespans: Vec<alass_core::TimeSpan> =
362 |         timings_to_alg_timespans(inc_file.timespans(), args.interval);
363 |     let ref_aligner_timespans: Vec<alass_core::TimeSpan> =
364 |         timings_to_alg_timespans(ref_file.timespans(), args.interval);
365 | 
366 |     let mut fps_scaling_factor = 1.;
367 |     if args.guess_fps_ratio {
368 |         let a = 25.;
369 |         let b = 24.;
370 |         let c = 23.976;
371 |         let ratios = [a / b, a / c, b / a, b / c, c / a, c / b];
372 |         let desc = ["25/24", "25/23.976", "24/25", "24/23.976", "23.976/25", "23.976/24"];
373 | 
374 |         let (opt_ratio_idx, _) = guess_fps_ratio(
375 |             &ref_aligner_timespans,
376 |             &inc_aligner_timespans,
377 |             &ratios,
378 |             ProgressInfo::new(1, Some("Guessing framerate ratio...".to_string())),
379 |         );
380 | 
381 |         fps_scaling_factor = if let Some(idx) = opt_ratio_idx { ratios[idx] } else { 1. };
382 | 
383 |         println!(
384 |             "info: 'reference file FPS/input file FPS' ratio is {}",
385 |             if let Some(idx) = opt_ratio_idx { desc[idx] } else { "1" }
386 |         );
387 |         println!();
388 | 
389 |         inc_aligner_timespans = inc_aligner_timespans
390 |             .into_iter()
391 |             .map(|x| x.scaled(fps_scaling_factor))
392 |             .collect();
393 |     }
394 | 
395 |     let align_start_msg = format!(
396 |         "synchronizing '{}' to reference file '{}'...",
397 |         args.incorrect_file_path.display(),
398 |         args.reference_file_path.display()
399 |     );
400 |     let alg_deltas;
401 |     if args.no_split_mode {
402 |         let num_inc_timespancs = inc_aligner_timespans.len();
403 | 
404 |         let alg_delta = alass_core::align_nosplit(
405 |             &ref_aligner_timespans,
406 |             &inc_aligner_timespans,
407 |             alass_core::standard_scoring,
408 |             ProgressInfo::new(1, Some(align_start_msg)),
409 |         )
410 |         .0;
411 | 
412 |         alg_deltas = std::vec::from_elem(alg_delta, num_inc_timespancs);
413 |     } else {
414 |         alg_deltas = align(
415 |             &ref_aligner_timespans,
416 |             &inc_aligner_timespans,
417 |             args.split_penalty,
418 |             args.speed_optimization,
419 |             alass_core::standard_scoring,
420 |             ProgressInfo::new(1, Some(align_start_msg)),
421 |         )
422 |         .0;
423 |     }
424 |     let deltas = alg_deltas_to_timing_deltas(&alg_deltas, args.interval);
425 | 
426 |     // group subtitles lines which have the same offset
427 |     let shift_groups: Vec<(AlgTimeDelta, Vec<TimeSpan>)> = get_subtitle_delta_groups(
428 |         alg_deltas
429 |             .iter()
430 |             .cloned()
431 |             .zip(inc_file.timespans().iter().cloned())
432 |             .collect(),
433 |     );
434 | 
435 |     for (shift_group_delta, shift_group_lines) in shift_groups {
436 |         // computes the first and last timestamp for all lines with that delta
437 |         // -> that way we can provide the user with an information like
438 |         //     "100 subtitles with 10min length"
439 |         let min = shift_group_lines
440 |             .iter()
441 |             .map(|subline| subline.start)
442 |             .min()
443 |             .expect("a subtitle group should have at least one subtitle line");
444 |         let max = shift_group_lines
445 |             .iter()
446 |             .map(|subline| subline.start)
447 |             .max()
448 |             .expect("a subtitle group should have at least one subtitle line");
449 | 
450 |         println!(
451 |             "shifted block of {} subtitles with length {} by {}",
452 |             shift_group_lines.len(),
453 |             max - min,
454 |             alg_delta_to_delta(shift_group_delta, args.interval)
455 |         );
456 |     }
457 | 
458 |     println!();
459 | 
460 |     if ref_file.timespans().is_empty() {
461 |         println!("warn: reference file has no subtitle lines");
462 |         println!();
463 |     }
464 |     if inc_file.timespans().is_empty() {
465 |         println!("warn: file with incorrect subtitles has no lines");
466 |         println!();
467 |     }
468 | 
469 |     fn scaled_timespan(ts: TimeSpan, fps_scaling_factor: f64) -> TimeSpan {
470 |         TimeSpan::new(
471 |             TimePoint::from_msecs((ts.start.msecs() as f64 * fps_scaling_factor) as i64),
472 |             TimePoint::from_msecs((ts.end.msecs() as f64 * fps_scaling_factor) as i64),
473 |         )
474 |     }
475 | 
476 |     let mut corrected_timespans: Vec<subparse::timetypes::TimeSpan> = inc_file
477 |         .timespans()
478 |         .iter()
479 |         .zip(deltas.iter())
480 |         .map(|(&timespan, &delta)| scaled_timespan(timespan, fps_scaling_factor) + delta)
481 |         .collect();
482 | 
483 |     if corrected_timespans.iter().any(|ts| ts.start.is_negative()) {
484 |         println!("warn: some subtitles now have negative timings, which can cause invalid subtitle files");
485 |         if args.allow_negative_timestamps {
486 |             println!(
487 |                 "warn: negative timestamps will be written to file, because you passed '-n' or '--allow-negative-timestamps'",
488 |             );
489 |         } else {
490 |             println!(
491 |                 "warn: negative subtitles will therefore moved to the start of the subtitle file by default; pass '-n' or '--allow-negative-timestamps' to disable this behavior",
492 |             );
493 | 
494 |             for corrected_timespan in &mut corrected_timespans {
495 |                 if corrected_timespan.start.is_negative() {
496 |                     let offset = subparse::timetypes::TimePoint::from_secs(0) - corrected_timespan.start;
497 |                     corrected_timespan.start = corrected_timespan.start + offset;
498 |                     corrected_timespan.end = corrected_timespan.end + offset;
499 |                 }
500 |             }
501 |         }
502 |         println!();
503 |     }
504 | 
505 |     // .idx only has start timepoints (the subtitle is shown until the next subtitle starts) - so retiming with gaps might
506 |     // produce errors
507 |     if output_file_format == SubtitleFormat::VobSubIdx {
508 |         println!("warn: writing to an '.idx' file can lead to unexpected results due to restrictions of this format");
509 |     }
510 | 
511 |     // incorrect file -> correct file
512 |     let shifted_timespans: Vec<SubtitleEntry> = corrected_timespans
513 |         .into_iter()
514 |         .map(|timespan| SubtitleEntry::from(timespan))
515 |         .collect();
516 | 
517 |     // write corrected files
518 |     let mut correct_file = inc_file.into_subtitle_file();
519 |     correct_file
520 |         .update_subtitle_entries(&shifted_timespans)
521 |         .with_context(|_| TopLevelErrorKind::FailedToUpdateSubtitle)?;
522 | 
523 |     write_data_to_file(
524 |         &args.output_file_path,
525 |         correct_file
526 |             .to_data()
527 |             .with_context(|_| TopLevelErrorKind::FailedToGenerateSubtitleData)?,
528 |     )?;
529 | 
530 |     Ok(())
531 | }
532 | 
533 | // //////////////////////////////////////////////////////////////////////////////////////////////////
534 | 
535 | fn main() {
536 |     match run() {
537 |         Ok(_) => std::process::exit(0),
538 |         Err(error) => {
539 |             print_error_chain(error);
540 |             std::process::exit(1)
541 |         }
542 |     }
543 | }
544 | 


--------------------------------------------------------------------------------
/alass-cli/src/video_decoder/ffmpeg_binary.rs:
--------------------------------------------------------------------------------
  1 | use failure::{Backtrace, Context, Fail, ResultExt};
  2 | use std::ffi::OsString;
  3 | use std::fmt;
  4 | use std::io::Read;
  5 | use std::path::{Path, PathBuf};
  6 | use std::process::Child;
  7 | use std::process::{ChildStdout, Command, Output, Stdio};
  8 | use std::str::from_utf8;
  9 | 
 10 | use byteorder::ByteOrder;
 11 | use serde::{Deserialize, Deserializer};
 12 | 
 13 | use crate::define_error;
 14 | 
 15 | #[derive(Debug, PartialEq, Eq)]
 16 | pub enum CodecType {
 17 |     Audio,
 18 |     Video,
 19 |     Subtitle,
 20 |     Other(String),
 21 | }
 22 | 
 23 | impl<'de> Deserialize<'de> for CodecType {
 24 |     fn deserialize<D: Deserializer<'de>>(d: D) -> Result<Self, D::Error> {
 25 |         let s = String::deserialize(d)?;
 26 |         match &s[..] {
 27 |             "audio" => Ok(CodecType::Audio),
 28 |             "video" => Ok(CodecType::Video),
 29 |             "subtitle" => Ok(CodecType::Subtitle),
 30 |             s => Ok(CodecType::Other(s.to_owned())),
 31 |         }
 32 |     }
 33 | }
 34 | 
 35 | #[derive(Debug, Deserialize)]
 36 | struct Stream {
 37 |     pub index: usize,
 38 |     pub codec_long_name: String,
 39 |     pub channels: Option<usize>,
 40 |     /// `.mkv` does not store the duration in the streams; we have to use `format -> duration` instead
 41 |     pub duration: Option<String>,
 42 |     pub codec_type: CodecType,
 43 | }
 44 | 
 45 | #[derive(Debug, Deserialize)]
 46 | struct Format {
 47 |     pub duration: Option<String>,
 48 | }
 49 | 
 50 | /// Metadata associated with a video.
 51 | #[derive(Debug, Deserialize)]
 52 | struct Metadata {
 53 |     streams: Vec<Stream>,
 54 |     format: Option<Format>,
 55 | }
 56 | 
 57 | define_error!(DecoderError, DecoderErrorKind);
 58 | 
 59 | #[derive(Debug, Fail)]
 60 | pub enum DecoderErrorKind {
 61 |     FailedToDecodeVideoStreamInfo,
 62 |     ExtractingMetadataFailed {
 63 |         cmd_path: PathBuf,
 64 |         file_path: PathBuf,
 65 |         args: Vec<OsString>,
 66 |     },
 67 |     NoAudioStream {
 68 |         path: PathBuf,
 69 |     },
 70 |     FailedExtractingAudio {
 71 |         file_path: PathBuf,
 72 |         cmd_path: PathBuf,
 73 |         args: Vec<OsString>,
 74 |     },
 75 |     FailedSpawningSubprocess {
 76 |         path: PathBuf,
 77 |         args: Vec<OsString>,
 78 |     },
 79 |     WaitingForProcessFailed {
 80 |         cmd_path: PathBuf,
 81 |     },
 82 |     ProcessErrorCode {
 83 |         cmd_path: PathBuf,
 84 |         code: Option<i32>,
 85 |     },
 86 |     ProcessErrorMessage {
 87 |         msg: String,
 88 |     },
 89 |     DeserializingMetadataFailed {
 90 |         path: PathBuf,
 91 |     },
 92 |     ReadError,
 93 |     FailedToParseDuration {
 94 |         s: String,
 95 |     },
 96 |     AudioSegmentProcessingFailed,
 97 |     NoDurationInformation,
 98 | }
 99 | 
100 | fn format_cmd(cmd_path: &PathBuf, args: &[OsString]) -> String {
101 |     let args_string: String = args
102 |         .iter()
103 |         .map(|x| format!("{}", x.to_string_lossy()))
104 |         .collect::<Vec<String>>()
105 |         .join(" ");
106 |     format!("{} {}", cmd_path.display(), args_string)
107 | }
108 | 
109 | impl fmt::Display for DecoderErrorKind {
110 |     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
111 |         match self {
112 |             DecoderErrorKind::FailedToDecodeVideoStreamInfo => write!(f, "failed to decode video stream info"),
113 |             DecoderErrorKind::DeserializingMetadataFailed { path } => {
114 |                 write!(f, "failed to deserialize metadata of file '{}'", path.display())
115 |             }
116 |             DecoderErrorKind::NoAudioStream { path } => write!(f, "no audio stream in file '{}'", path.display()),
117 |             DecoderErrorKind::FailedExtractingAudio {
118 |                 file_path,
119 |                 cmd_path,
120 |                 args,
121 |             } => write!(
122 |                 f,
123 |                 "failed to extract audio from '{}' with '{}' ",
124 |                 file_path.display(),
125 |                 format_cmd(cmd_path, args)
126 |             ),
127 |             DecoderErrorKind::FailedSpawningSubprocess { path, args } => {
128 |                 write!(f, "failed to spawn subprocess '{}' ", format_cmd(path, args))
129 |             }
130 |             DecoderErrorKind::WaitingForProcessFailed { cmd_path } => {
131 |                 write!(f, "failed to check status of subprocess '{}'", cmd_path.display())
132 |             }
133 |             DecoderErrorKind::ProcessErrorCode { cmd_path, code } => write!(
134 |                 f,
135 |                 "process '{}' returned error code '{}'",
136 |                 cmd_path.display(),
137 |                 code.map(|x| x.to_string())
138 |                     .unwrap_or_else(|| String::from("interrupted?"))
139 |             ),
140 |             DecoderErrorKind::ProcessErrorMessage { msg } => write!(f, "stderr: {}", msg),
141 |             DecoderErrorKind::ExtractingMetadataFailed {
142 |                 file_path,
143 |                 cmd_path,
144 |                 args,
145 |             } => write!(
146 |                 f,
147 |                 "failed to extract metadata from '{}' using command '{}'",
148 |                 file_path.display(),
149 |                 format_cmd(cmd_path, args)
150 |             ),
151 |             DecoderErrorKind::ReadError => write!(f, "error while reading stdout"),
152 |             DecoderErrorKind::FailedToParseDuration { s } => {
153 |                 write!(f, "failed to parse duration string '{}' from metadata", s)
154 |             }
155 |             DecoderErrorKind::AudioSegmentProcessingFailed => write!(f, "processing audio segment failed"),
156 |             DecoderErrorKind::NoDurationInformation => write!(f, "no audio duration information found"),
157 |         }
158 |     }
159 | }
160 | 
161 | trait IntoOk<T> {
162 |     fn into_ok<I>(self) -> Result<T, I>;
163 | }
164 | impl<T> IntoOk<T> for T {
165 |     fn into_ok<I>(self) -> Result<T, I> {
166 |         Ok(self)
167 |     }
168 | }
169 | 
170 | pub struct VideoDecoderFFmpegBinary {}
171 | 
172 | static PROGRESS_PRESCALER: i64 = 200;
173 | 
174 | impl VideoDecoderFFmpegBinary {
175 |     /// Samples are pushed in 8kHz mono/single-channel format.
176 |     pub fn decode<T>(
177 |         file_path: impl AsRef<Path>,
178 |         audio_index: Option<usize>,
179 |         receiver: impl super::AudioReceiver<Output = T>,
180 |         mut progress_handler: impl super::ProgressHandler,
181 |     ) -> Result<T, DecoderError> {
182 |         let file_path_buf: PathBuf = file_path.as_ref().into();
183 | 
184 |         let args = vec![
185 |             OsString::from("-v"),
186 |             OsString::from("error"),
187 |             OsString::from("-show_entries"),
188 |             OsString::from("format=duration:stream=index,codec_long_name,channels,duration,codec_type"),
189 |             OsString::from("-of"),
190 |             OsString::from("json"),
191 |             OsString::from(file_path.as_ref()),
192 |         ];
193 | 
194 |         let ffprobe_path: PathBuf = std::env::var_os("ALASS_FFPROBE_PATH")
195 |             .unwrap_or(OsString::from("ffprobe"))
196 |             .into();
197 | 
198 |         let metadata: Metadata =
199 |             Self::get_metadata(file_path_buf.clone(), ffprobe_path.clone(), &args).with_context(|_| {
200 |                 DecoderErrorKind::ExtractingMetadataFailed {
201 |                     file_path: file_path_buf.clone(),
202 |                     cmd_path: ffprobe_path.clone(),
203 |                     args: args,
204 |                 }
205 |             })?;
206 | 
207 |         let mut audio_streams = metadata
208 |             .streams
209 |             .into_iter()
210 |             .filter(|s| s.codec_type == CodecType::Audio && s.channels.is_some());
211 | 
212 |         let best_stream_opt = match audio_index {
213 |             None => audio_streams
214 |                 .min_by_key(|s| s.channels.unwrap()),
215 |             Some(ai) => audio_streams
216 |                 .find(|s| s.index == ai)
217 |         };
218 | 
219 |         let best_stream: Stream;
220 |         match best_stream_opt {
221 |             Some(x) => best_stream = x,
222 |             None => {
223 |                 return Err(DecoderError::from(DecoderErrorKind::NoAudioStream {
224 |                     path: file_path.as_ref().into(),
225 |                 }))
226 |             }
227 |         }
228 | 
229 |         let ffmpeg_path: PathBuf = std::env::var_os("ALASS_FFMPEG_PATH")
230 |             .unwrap_or(OsString::from("ffmpeg"))
231 |             .into();
232 | 
233 |         let args: Vec<OsString> = vec![
234 |             // only print errors
235 |             OsString::from("-v"),
236 |             OsString::from("error"),
237 |             // "yes" -> disables user interaction
238 |             OsString::from("-y"),
239 |             // input file
240 |             OsString::from("-i"),
241 |             file_path.as_ref().into(),
242 |             // select stream
243 |             OsString::from("-map"),
244 |             format!("0:{}", best_stream.index).into(),
245 |             // audio codec: 16-bit signed little endian
246 |             OsString::from("-acodec"),
247 |             OsString::from("pcm_s16le"),
248 |             // resample to 8khz
249 |             OsString::from("-ar"),
250 |             OsString::from("8000"),
251 |             // resample to single channel
252 |             OsString::from("-ac"),
253 |             OsString::from("1"),
254 |             // output 16-bit signed little endian stream directly (no wav, etc.)
255 |             OsString::from("-f"),
256 |             OsString::from("s16le"),
257 |             // output to stdout pipe
258 |             OsString::from("-"),
259 |         ];
260 | 
261 |         let format_opt: Option<Format> = metadata.format;
262 | 
263 |         // `.mkv` containers do not store duration info in streams, only the format information does contain it
264 |         let duration_str = best_stream
265 |             .duration
266 |             .or_else(|| format_opt.and_then(|format| format.duration))
267 |             .ok_or_else(|| DecoderError::from(DecoderErrorKind::NoDurationInformation))?;
268 | 
269 |         let duration = duration_str
270 |             .parse::<f64>()
271 |             .with_context(|_| DecoderErrorKind::FailedToParseDuration { s: duration_str })?;
272 | 
273 |         let num_samples: i64 = (duration * 8000.0) as i64 / PROGRESS_PRESCALER;
274 | 
275 |         progress_handler.init(num_samples);
276 | 
277 |         return Self::extract_audio_stream(receiver, progress_handler, ffmpeg_path.clone(), &args)
278 |             .with_context(|_| DecoderErrorKind::FailedExtractingAudio {
279 |                 file_path: file_path_buf.clone(),
280 |                 cmd_path: ffmpeg_path.clone(),
281 |                 args: args,
282 |             })?
283 |             .into_ok();
284 |     }
285 | 
286 |     fn extract_audio_stream<T>(
287 |         mut receiver: impl super::AudioReceiver<Output = T>,
288 |         mut progress_handler: impl super::ProgressHandler,
289 |         ffmpeg_path: PathBuf,
290 |         args: &[OsString],
291 |     ) -> Result<T, DecoderError> {
292 |         let mut ffmpeg_process: Child = Command::new(ffmpeg_path.clone())
293 |             .args(args)
294 |             .stdin(Stdio::null())
295 |             .stderr(Stdio::piped())
296 |             .stdout(Stdio::piped())
297 |             .spawn()
298 |             .with_context(|_| DecoderErrorKind::FailedSpawningSubprocess {
299 |                 path: ffmpeg_path.clone(),
300 |                 args: args.to_vec(),
301 |             })?;
302 | 
303 |         let mut stdout: ChildStdout = ffmpeg_process.stdout.take().unwrap();
304 | 
305 |         enum ParserState {
306 |             Start,
307 |             SingleByte(u8),
308 |         }
309 | 
310 |         let mut data: Vec<u8> = std::vec::from_elem(0, 200 * 1024 * 1024);
311 |         let data2_cap = 1024 * 1024;
312 |         let mut data2: Vec<i16> = Vec::with_capacity(data2_cap);
313 |         let mut parser_state: ParserState = ParserState::Start;
314 |         let mut progress_prescaler_counter = 0;
315 | 
316 |         loop {
317 |             // improves performance by allowing ffmpeg to generate more data in pipe
318 |             // TODO: an async tokio read might also have the same effect (without being as machine dependent)
319 |             //  -> too low: does not do anything (+some otherhead)
320 |             //  -> too high: slows down computaton because ffmpeg has to wait for this process to read
321 |             //std::thread::sleep(Duration::from_nanos(1000));
322 | 
323 |             let read_bytes = stdout.read(&mut data).with_context(|_| DecoderErrorKind::ReadError)?;
324 |             //println!("{}", read_bytes);
325 | 
326 |             if read_bytes == 0 {
327 |                 match ffmpeg_process
328 |                     .wait()
329 |                     .with_context(|_| DecoderErrorKind::WaitingForProcessFailed {
330 |                         cmd_path: ffmpeg_path.clone(),
331 |                     })?
332 |                     .code()
333 |                 {
334 |                     Some(0) => {
335 |                         receiver
336 |                             .push_samples(&data2)
337 |                             .with_context(|_| DecoderErrorKind::AudioSegmentProcessingFailed)?;
338 |                         data2.clear();
339 |                         progress_handler.finish();
340 |                         return Ok(receiver
341 |                             .finish()
342 |                             .with_context(|_| DecoderErrorKind::AudioSegmentProcessingFailed)?);
343 |                     }
344 |                     code @ Some(_) | code @ None => {
345 |                         let error_code_err: DecoderErrorKind = DecoderErrorKind::ProcessErrorCode {
346 |                             cmd_path: ffmpeg_path,
347 |                             code: code,
348 |                         };
349 | 
350 |                         let mut stderr_data = Vec::new();
351 |                         ffmpeg_process
352 |                             .stderr
353 |                             .unwrap()
354 |                             .read_to_end(&mut stderr_data)
355 |                             .with_context(|_| DecoderErrorKind::ReadError)?;
356 | 
357 |                         let stderr_str: String = String::from_utf8_lossy(&stderr_data).into();
358 | 
359 |                         if stderr_str.is_empty() {
360 |                             return Err(error_code_err.into());
361 |                         } else {
362 |                             return Err(DecoderError::from(DecoderErrorKind::ProcessErrorMessage {
363 |                                 msg: stderr_str,
364 |                             }))
365 |                             .with_context(|_| error_code_err)
366 |                             .map_err(|x| DecoderError::from(x));
367 |                         }
368 |                     }
369 |                 }
370 |             }
371 | 
372 |             for &byte in &data[0..read_bytes] {
373 |                 match parser_state {
374 |                     ParserState::Start => parser_state = ParserState::SingleByte(byte),
375 |                     ParserState::SingleByte(last_byte) => {
376 |                         let two_bytes = [last_byte, byte];
377 |                         let sample = byteorder::LittleEndian::read_i16(&two_bytes);
378 |                         receiver
379 |                             .push_samples(&[sample])
380 |                             .with_context(|_| DecoderErrorKind::AudioSegmentProcessingFailed)?;
381 | 
382 |                         if progress_prescaler_counter == PROGRESS_PRESCALER {
383 |                             progress_handler.inc();
384 |                             progress_prescaler_counter = 0;
385 |                         }
386 | 
387 |                         progress_prescaler_counter = progress_prescaler_counter + 1;
388 | 
389 |                         /*data2.push(sample);
390 |                         if data2.len() == data2_cap {
391 |                             receiver.push_samples(&data2);
392 |                             data2.clear();
393 |                         }*/
394 |                         parser_state = ParserState::Start;
395 |                     }
396 |                 }
397 |             }
398 |         }
399 |     }
400 | 
401 |     fn get_metadata(file_path: PathBuf, ffprobe_path: PathBuf, args: &[OsString]) -> Result<Metadata, DecoderError> {
402 |         let ffprobe_process: Output = Command::new(ffprobe_path.clone())
403 |             .args(args)
404 |             .stdin(Stdio::null())
405 |             .stderr(Stdio::piped())
406 |             .stdout(Stdio::piped())
407 |             .output()
408 |             .with_context(|_| DecoderErrorKind::FailedSpawningSubprocess {
409 |                 path: ffprobe_path.clone(),
410 |                 args: args.to_vec(),
411 |             })?;
412 | 
413 |         if !ffprobe_process.status.success() {
414 |             let stderr: String = String::from_utf8_lossy(&ffprobe_process.stderr)
415 |                 .to_string()
416 |                 .trim_end()
417 |                 .to_string();
418 | 
419 |             let err = DecoderErrorKind::ProcessErrorCode {
420 |                 cmd_path: ffprobe_path.clone(),
421 |                 code: ffprobe_process.status.code(),
422 |             };
423 | 
424 |             if stderr.is_empty() {
425 |                 return Err(DecoderError::from(err));
426 |             } else {
427 |                 return Err(DecoderError::from(DecoderErrorKind::ProcessErrorMessage {
428 |                     msg: stderr,
429 |                 }))
430 |                 .with_context(|_| err)
431 |                 .map_err(|x| DecoderError::from(x));
432 |             }
433 |         }
434 | 
435 |         let stdout =
436 |             from_utf8(&ffprobe_process.stdout).with_context(|_| DecoderErrorKind::FailedToDecodeVideoStreamInfo)?;
437 | 
438 |         let metadata: Metadata = serde_json::from_str(stdout)
439 |             .with_context(|_| DecoderErrorKind::DeserializingMetadataFailed { path: file_path })?;
440 | 
441 |         Ok(metadata)
442 |     }
443 | }
444 | 


--------------------------------------------------------------------------------
/alass-cli/src/video_decoder/ffmpeg_library.rs:
--------------------------------------------------------------------------------
  1 | use failure::{Backtrace, Context, Fail};
  2 | use ffmpeg_sys::*;
  3 | use std::convert::TryInto;
  4 | use std::ffi::{CStr, CString, OsString};
  5 | use std::fmt;
  6 | use std::path::{Path, PathBuf};
  7 | use std::ptr::null_mut;
  8 | 
  9 | use crate::define_error;
 10 | 
 11 | fn av_err2str(errnum: libc::c_int) -> String {
 12 |     let mut err_buffer: [libc::c_char; 256] = [0; 256];
 13 |     unsafe {
 14 |         av_make_error_string(err_buffer.as_mut_ptr() as *mut i8, err_buffer.len(), errnum);
 15 |         CStr::from_ptr(&err_buffer as *const libc::c_char)
 16 |             .to_string_lossy()
 17 |             .to_string()
 18 |     }
 19 | }
 20 | 
 21 | define_error!(DecoderError, DecoderErrorKind);
 22 | 
 23 | #[derive(Debug, Fail)]
 24 | pub(crate) enum DecoderErrorKind {}
 25 | 
 26 | fn format_cmd(cmd_path: &PathBuf, args: &[OsString]) -> String {
 27 |     let args_string: String = args
 28 |         .iter()
 29 |         .map(|x| format!("{}", x.to_string_lossy()))
 30 |         .collect::<Vec<String>>()
 31 |         .join(" ");
 32 |     format!("{} {}", cmd_path.display(), args_string)
 33 | }
 34 | 
 35 | impl fmt::Display for DecoderErrorKind {
 36 |     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
 37 |         unimplemented!()
 38 |     }
 39 | }
 40 | 
 41 | pub struct VideoDecoderFFmpegLibrary {}
 42 | 
 43 | impl VideoDecoderFFmpegLibrary {
 44 |     /// Samples are pushed in 8kHz mono/single-channel format.
 45 |     pub(crate) fn decode<T>(
 46 |         file_path: impl AsRef<Path>,
 47 |         audio_index: Option<usize>,
 48 |         mut receiver: impl super::AudioReceiver<Output = T>,
 49 |         mut progress_handler: impl super::ProgressHandler,
 50 |     ) -> Result<T, DecoderError> {
 51 |         unsafe {
 52 |             let mut format_context: *mut AVFormatContext = avformat_alloc_context();
 53 | 
 54 |             let file_path_: String = file_path.as_ref().to_string_lossy().into_owned();
 55 | 
 56 |             let result: libc::c_int;
 57 | 
 58 |             result = avformat_open_input(
 59 |                 &mut format_context as *mut *mut AVFormatContext,
 60 |                 file_path_.as_bytes().as_ptr() as *const i8,
 61 |                 null_mut(),
 62 |                 null_mut(),
 63 |             );
 64 | 
 65 |             if result < 0 {
 66 |                 // TODO
 67 |                 panic!(
 68 |                     "Failed to open media file '{}': {}",
 69 |                     file_path.as_ref().display(),
 70 |                     av_err2str(result)
 71 |                 );
 72 |             }
 73 | 
 74 |             avformat_find_stream_info(format_context, null_mut());
 75 | 
 76 |             let streams: &[*mut AVStream] =
 77 |                 std::slice::from_raw_parts((*format_context).streams, (*format_context).nb_streams as usize);
 78 | 
 79 |             let mut audio_stream_opt: Option<*mut AVStream> = None;
 80 | 
 81 |             for &stream in streams {
 82 |                 // If audio_index is set, ignore the 'least amount of channels' heuristic
 83 |                 if let Some(ai) = audio_index {
 84 |                     if ai == (*stream).index {
 85 |                         audio_stream_opt = Some(stream);
 86 |                     } else {
 87 |                         continue
 88 |                     }
 89 |                 }
 90 | 
 91 |                 let local_codec_parameters: *mut AVCodecParameters = (*stream).codecpar;
 92 | 
 93 |                 if (*local_codec_parameters).codec_type == AVMediaType::AVMEDIA_TYPE_AUDIO {
 94 |                     // choose the audio stream with the least amount of channels (it can be resampled faster)
 95 |                     if let Some(saved_audio_stream) = audio_stream_opt {
 96 |                         if (*(*saved_audio_stream).codecpar).channels > (*local_codec_parameters).channels {
 97 |                             audio_stream_opt = Some(stream);
 98 |                         }
 99 |                     } else {
100 |                         audio_stream_opt = Some(stream);
101 |                     }
102 |                 }
103 |             }
104 | 
105 |             if audio_stream_opt.is_none() {
106 |                 /* TODO */
107 |                 panic!("no audio stream found");
108 |             }
109 |             let audio_stream = audio_stream_opt.unwrap();
110 | 
111 |             let local_codec_parameters: *mut AVCodecParameters = (*audio_stream).codecpar;
112 | 
113 |             let local_codec: *mut AVCodec = avcodec_find_decoder((*local_codec_parameters).codec_id);
114 |             //let local_codec_name: &CStr = CStr::from_ptr((*local_codec).long_name);
115 | 
116 |             /*println!(
117 |                 "Audio Codec '{}': {} channels, sample rate {}",
118 |                 local_codec_name.to_string_lossy(),
119 |                 (*local_codec_parameters).channels,
120 |                 (*local_codec_parameters).sample_rate
121 |             );*/
122 | 
123 |             let codec_context: *mut AVCodecContext = avcodec_alloc_context3(local_codec as *const AVCodec);
124 |             avcodec_parameters_to_context(codec_context, local_codec_parameters);
125 |             avcodec_open2(codec_context, local_codec, null_mut());
126 | 
127 |             let _av_opt_set_int = |swr: *mut SwrContext, name: &str, val: i64, search_flag: libc::c_int| {
128 |                 av_opt_set_int(
129 |                     swr as *mut libc::c_void,
130 |                     CString::new(name).unwrap().into_raw(),
131 |                     val,
132 |                     search_flag,
133 |                 )
134 |             };
135 | 
136 |             let _av_opt_set_int = |swr: *mut SwrContext, name: &str, val: i64, search_flag: libc::c_int| {
137 |                 av_opt_set_int(
138 |                     swr as *mut libc::c_void,
139 |                     CString::new(name).unwrap().into_raw(),
140 |                     val,
141 |                     search_flag,
142 |                 )
143 |             };
144 | 
145 |             let _av_opt_set_sample_fmt =
146 |                 |obj: *mut SwrContext, name: &str, fmt: AVSampleFormat, search_flags: libc::c_int| -> libc::c_int {
147 |                     av_opt_set_sample_fmt(
148 |                         obj as *mut libc::c_void,
149 |                         CString::new(name).unwrap().into_raw(),
150 |                         fmt,
151 |                         search_flags,
152 |                     )
153 |                 };
154 | 
155 |             let in_channel_layout = (*codec_context).channel_layout.try_into().unwrap();
156 |             let in_channel_count: i64 = (*codec_context).channels.try_into().unwrap();
157 |             let in_sample_rate: i64 = (*codec_context).sample_rate.try_into().unwrap();
158 |             let in_sample_format = (*codec_context).sample_fmt;
159 | 
160 |             let out_channel_count = 1;
161 |             let out_channel_layout = AV_CH_LAYOUT_MONO.try_into().unwrap();
162 |             let out_sample_rate = 8000;
163 |             let out_sample_format = AVSampleFormat::AV_SAMPLE_FMT_S16P;
164 | 
165 |             // prepare resampler
166 |             let swr: *mut SwrContext = swr_alloc();
167 |             _av_opt_set_int(swr, "in_channel_count", in_channel_count, 0);
168 |             _av_opt_set_int(swr, "in_channel_layout", in_channel_layout, 0);
169 |             _av_opt_set_int(swr, "in_sample_rate", in_sample_rate, 0);
170 |             _av_opt_set_sample_fmt(swr, "in_sample_fmt", in_sample_format, 0);
171 | 
172 |             _av_opt_set_int(swr, "out_channel_count", out_channel_count, 0);
173 |             _av_opt_set_int(swr, "out_channel_layout", out_channel_layout, 0);
174 |             _av_opt_set_int(swr, "out_sample_rate", out_sample_rate, 0);
175 |             _av_opt_set_sample_fmt(swr, "out_sample_fmt", out_sample_format, 0);
176 | 
177 |             swr_init(swr);
178 |             if swr_is_initialized(swr) == 0 {
179 |                 unimplemented!();
180 |                 //pri(stderr, "Resampler has not been properly initialized\n");
181 |                 //return -1;
182 |             }
183 | 
184 |             /* compute the number of converted samples: buffering is avoided
185 |              * ensuring that the output buffer will contain at least all the
186 |              * converted input samples */
187 |             let src_nb_samples = 1024; // this is just a guess...
188 |             let mut max_out_samples: i32 =
189 |                 av_rescale_rnd(src_nb_samples, out_sample_rate, in_sample_rate, AVRounding::AV_ROUND_UP) as i32;
190 | 
191 |             let mut buffer: *mut i16 = null_mut();
192 |             av_samples_alloc(
193 |                 &mut buffer as *mut *mut i16 as *mut *mut u8,
194 |                 null_mut(),
195 |                 out_channel_count as i32,
196 |                 max_out_samples,
197 |                 out_sample_format,
198 |                 0,
199 |             );
200 | 
201 |             let packet: *mut AVPacket = av_packet_alloc();
202 |             let frame: *mut AVFrame = av_frame_alloc();
203 | 
204 |             progress_handler.init((*audio_stream).nb_frames);
205 | 
206 |             while av_read_frame(format_context, packet) >= 0 {
207 |                 //println!("read frame {:?}", packet);
208 | 
209 |                 if (*packet).stream_index != (*audio_stream).index {
210 |                     continue;
211 |                 }
212 | 
213 |                 progress_handler.inc();
214 | 
215 |                 //println!("stream fits");
216 | 
217 |                 let mut response = avcodec_send_packet(codec_context, packet);
218 |                 if response < 0 {
219 |                     panic!("{}", av_err2str(response));
220 |                 }
221 | 
222 |                 loop {
223 |                     //println!("begin receive_frame");
224 |                     response = avcodec_receive_frame(codec_context, frame);
225 |                     //println!("end receive_frame");
226 | 
227 |                     if response == AVERROR(EAGAIN) || response == AVERROR_EOF {
228 |                         break;
229 |                     } else if response < 0 {
230 |                         panic!("Error: {}", av_err2str(response));
231 |                     }
232 | 
233 |                     //let out_samples = av_rescale_rnd(swr_get_delay(swr, 48000) + in_samples, 44100, 48000, AV_ROUND_UP);
234 |                     let out_sample_count = swr_get_out_samples(swr, (*frame).nb_samples);
235 | 
236 |                     // Resize output buffer to allow all samples (without buffering) to be stored.
237 |                     if out_sample_count > max_out_samples {
238 |                         max_out_samples = out_sample_count;
239 |                         av_freep(&mut buffer as *mut *mut i16 as *mut libc::c_void);
240 |                         av_samples_alloc(
241 |                             &mut buffer as *mut *mut i16 as *mut *mut u8,
242 |                             null_mut(),
243 |                             out_channel_count as i32,
244 |                             max_out_samples,
245 |                             out_sample_format,
246 |                             0,
247 |                         );
248 |                     }
249 | 
250 |                     // resample frames
251 |                     let frame_count = swr_convert(
252 |                         swr,
253 |                         &mut buffer as *mut *mut i16 as *mut *mut u8,
254 |                         out_sample_count,
255 |                         (*frame).data.as_mut_ptr() as *mut *const u8,
256 |                         (*frame).nb_samples,
257 |                     );
258 | 
259 |                     //println!("Samples: {} Predicted: {} Frames: {}", (*frame).nb_samples, out_sample_count, frame_count);
260 |                     let out_slice = std::slice::from_raw_parts_mut(buffer, frame_count as usize);
261 | 
262 |                     receiver.push_samples(out_slice);
263 | 
264 |                     /*for v in out_slice {
265 |                         println!("{}", v);
266 |                     }*/
267 | 
268 |                     //println!("Frame count: {}", frame_count);
269 | 
270 |                     //println!("freep done");
271 |                 }
272 | 
273 |                 av_packet_unref(packet);
274 |             }
275 | 
276 |             av_freep(&mut buffer as *mut *mut i16 as *mut libc::c_void);
277 | 
278 |             avformat_free_context(format_context);
279 |             // TODO: cleanup everything
280 |         }
281 | 
282 |         progress_handler.finish();
283 | 
284 |         Ok(receiver.finish())
285 |     }
286 | }
287 | 


--------------------------------------------------------------------------------
/alass-cli/src/video_decoder/mod.rs:
--------------------------------------------------------------------------------
 1 | #[cfg(feature = "ffmpeg-library")]
 2 | mod ffmpeg_library;
 3 | 
 4 | #[cfg(feature = "ffmpeg-library")]
 5 | pub use ffmpeg_library::VideoDecoderFFmpegLibrary as VideoDecoder;
 6 | 
 7 | #[cfg(feature = "ffmpeg-binary")]
 8 | mod ffmpeg_binary;
 9 | 
10 | #[cfg(feature = "ffmpeg-binary")]
11 | pub use ffmpeg_binary::VideoDecoderFFmpegBinary as VideoDecoder;
12 | 
13 | pub trait AudioReceiver {
14 |     type Output;
15 |     type Error: failure::Fail;
16 | 
17 |     /// Samples are in 8000kHz mono/single-channel format.
18 |     fn push_samples(&mut self, samples: &[i16]) -> Result<(), Self::Error>;
19 | 
20 |     fn finish(self) -> Result<Self::Output, Self::Error>;
21 | }
22 | 
23 | pub struct ChunkedAudioReceiver<R: AudioReceiver> {
24 |     buffer: Vec<i16>,
25 |     filled: usize,
26 |     next: R,
27 | }
28 | 
29 | impl<R: AudioReceiver> ChunkedAudioReceiver<R> {
30 |     pub fn new(size: usize, next: R) -> ChunkedAudioReceiver<R> {
31 |         ChunkedAudioReceiver {
32 |             buffer: std::vec::from_elem(0, size),
33 |             filled: 0,
34 |             next,
35 |         }
36 |     }
37 | }
38 | 
39 | impl<R: AudioReceiver> AudioReceiver for ChunkedAudioReceiver<R> {
40 |     type Output = R::Output;
41 |     type Error = R::Error;
42 | 
43 |     fn push_samples(&mut self, mut samples: &[i16]) -> Result<(), R::Error> {
44 |         assert!(self.buffer.len() > self.filled);
45 | 
46 |         loop {
47 |             if samples.is_empty() {
48 |                 break;
49 |             }
50 | 
51 |             let sample_count = std::cmp::min(self.buffer.len() - self.filled, samples.len());
52 |             self.buffer[self.filled..self.filled + sample_count].clone_from_slice(&samples[..sample_count]);
53 | 
54 |             samples = &samples[sample_count..];
55 | 
56 |             self.filled = self.filled + sample_count;
57 | 
58 |             if self.filled == self.buffer.len() {
59 |                 self.next.push_samples(self.buffer.as_slice())?;
60 |                 self.filled = 0;
61 |             }
62 |         }
63 | 
64 |         Ok(())
65 |     }
66 | 
67 |     fn finish(self) -> Result<R::Output, R::Error> {
68 |         self.next.finish()
69 |     }
70 | }
71 | 
72 | /// Use this trait if you want more detailed information about the progress of operations.
73 | pub trait ProgressHandler {
74 |     /// Will be called one time before `inc()` is called. `steps` is the
75 |     /// number of times `inc()` will be called.
76 |     ///
77 |     /// The number of steps is around the number of lines in the "incorrect" subtitle.
78 |     /// Be aware that this number can be zero!
79 |     #[allow(unused_variables)]
80 |     fn init(&mut self, steps: i64) {}
81 | 
82 |     /// We made (small) progress!
83 |     fn inc(&mut self) {}
84 | 
85 |     /// Will be called after the last `inc()`, when `inc()` was called `steps` times.
86 |     fn finish(&mut self) {}
87 | }
88 | 
89 | /*struct NoProgressHandler {}
90 | impl ProgressHandler for NoProgressHandler {}*/
91 | 


--------------------------------------------------------------------------------
/alass-core/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "alass-core"
 3 | version = "2.0.0"
 4 | authors = ["kaegi <kaegi.dev@gmail.com>"]
 5 | description = "Automatic Language-Agnostic Subtitle Synchronization (Library)"
 6 | repository = "https://github.com/kaegi/alass/alass-core"
 7 | documentation = "https://docs.rs/alass-core"
 8 | readme = "README.md"
 9 | keywords = ["subtitle", "align", "automatic", "api", "tool"]
10 | license = "GPL-3.0"
11 | edition = "2018"
12 | 
13 | [features]
14 | default = []
15 | 
16 | # In nosplit mode, the most expensive operation is sorting of sorted
17 | # vectors. In runtime analysis, using a heap-sort-like algorithm
18 | # is more performant than assuming a large unsorted array. In
19 | # tests it is only half as fast. (1s vs 2s)
20 | nosplit-heap-sort = []
21 | 
22 | [dependencies]
23 | 
24 | [dev-dependencies]
25 | rand = "0.7"
26 | 


--------------------------------------------------------------------------------
/alass-core/README.md:
--------------------------------------------------------------------------------
 1 | # alass-core
 2 | 
 3 | This Rust library contains the core algorithm for `alass`, the "Automatic Language-Agnostic Subtitle Sychronization" tool. If you want to go to the command line tool instead, please click [here](https://github.com/kaegi/alass).
 4 | 
 5 | 
 6 | ## How to use the library
 7 | Add this to your `Cargo.toml`:
 8 | 
 9 | ```toml
10 | [dependencies]
11 | alass-core = "2.0.0"
12 | ```
13 | 
14 | The library only contains one function that takes two sequences of time spans and returns the offsets to get the best possible alignment.
15 | 
16 | [Documentation](https://docs.rs/alass-core)
17 | 
18 | [Crates.io](https://crates.io/crates/alass-core)
19 | 
20 | ### Documentaion
21 | 
22 | For much more information, please see the workspace information [here](https://github.com/kaegi/alass).


--------------------------------------------------------------------------------
/alass-core/src/lib.rs:
--------------------------------------------------------------------------------
  1 | // This file is part of the Rust library and binary `alass`.
  2 | //
  3 | // Copyright (C) 2017 kaegi
  4 | //
  5 | // This program is free software: you can redistribute it and/or modify
  6 | // it under the terms of the GNU General Public License as published by
  7 | // the Free Software Foundation, either version 3 of the License, or
  8 | // (at your option) any later version.
  9 | //
 10 | // This program is distributed in the hope that it will be useful,
 11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 | // GNU General Public License for more details.
 14 | //
 15 | // You should have received a copy of the GNU General Public License
 16 | // along with this program.  If not, see <http://www.gnu.org/licenses/>.
 17 | 
 18 | #![deny(
 19 |     //missing_docs,
 20 |     missing_debug_implementations,
 21 |     //missing_copy_implementations,
 22 |     trivial_casts,
 23 |     //unsafe_code,
 24 |     unstable_features,
 25 |     unused_import_braces,
 26 |     unused_qualifications
 27 | )]
 28 | #![allow(unknown_lints)] // for clippy
 29 | 
 30 | //! `alass` takes two timespan arrays (e.g. from two subtitle files) and
 31 | //! tries to align the `incorrect` subtitles
 32 | //! to the `reference` subtitle. It automatically fixes offsets and
 33 | //! introduces/removes breaks between subtitles in the `incorrect`
 34 | //! subtitle to achive the best alignment.
 35 | 
 36 | #[cfg(test)]
 37 | extern crate rand;
 38 | 
 39 | mod alass;
 40 | mod rating_type;
 41 | #[allow(dead_code)]
 42 | mod segments;
 43 | mod time_types;
 44 | mod timespan_ops;
 45 | 
 46 | use crate::alass::Aligner;
 47 | pub use crate::alass::NoProgressHandler;
 48 | pub use crate::alass::ProgressHandler;
 49 | use crate::rating_type::{Rating, RatingDelta, RatingExt};
 50 | pub use crate::time_types::{TimeDelta, TimePoint, TimeSpan};
 51 | use crate::timespan_ops::prepare_time_spans;
 52 | use std::cmp::{max, min};
 53 | 
 54 | fn denormalize_split_penalty(ref_list_len: usize, in_list_len: usize, split_penalty_normalized: f64) -> RatingDelta {
 55 |     RatingDelta::convert_from_f64(min(ref_list_len, in_list_len) as f64 * split_penalty_normalized / 1000.0)
 56 | }
 57 | 
 58 | pub type Score = f64;
 59 | 
 60 | /// This score is 1 for equally length spans and lower the more the spans are unequal in length (use this scoring if you're not sure what to take).
 61 | pub fn standard_scoring(a: TimeDelta, b: TimeDelta) -> Score {
 62 |     let min: f64 = min(a, b).as_f64();
 63 |     let max: f64 = max(a, b).as_f64();
 64 |     min / max
 65 | }
 66 | 
 67 | /// Calculate score based only on the overlapping length of the
 68 | /// intervals (better when comparing scaled subtitles; used for FPS correction).
 69 | pub fn overlap_scoring(a: TimeDelta, b: TimeDelta) -> Score {
 70 |     let min: f64 = min(a, b).as_f64();
 71 |     min * 0.00001
 72 | }
 73 | 
 74 | /// Matches an `incorrect` subtitle list to a `reference` subtitle list with only a single constant shift (no split).
 75 | ///
 76 | /// Returns the delta for every time span in list.
 77 | ///
 78 | /// This function takes usually less than 300ms on 2h30min subtitle data.
 79 | ///
 80 | /// Use `standard_scoring` as score function if no fine tuning is required.
 81 | pub fn align_nosplit(
 82 |     reference: &[TimeSpan],
 83 |     list: &[TimeSpan],
 84 |     score_fn: impl Fn(TimeDelta, TimeDelta) -> f64 + Copy,
 85 |     mut progress_handler: impl ProgressHandler,
 86 | ) -> (TimeDelta, Score) {
 87 |     progress_handler.init(1);
 88 | 
 89 |     let (ref_nonoverlapping, _) = prepare_time_spans(reference);
 90 |     let (list_nonoverlapping, _) = prepare_time_spans(list);
 91 | 
 92 |     if list_nonoverlapping.is_empty() || ref_nonoverlapping.is_empty() {
 93 |         return (TimeDelta::zero(), 0.);
 94 |     }
 95 | 
 96 |     // get deltas for non-overlapping timespans
 97 |     let (delta, score) = Aligner::align_constant_delta(&ref_nonoverlapping, &list_nonoverlapping, score_fn);
 98 |     progress_handler.inc();
 99 |     progress_handler.finish();
100 | 
101 |     return (delta, score.as_readable_f64());
102 | }
103 | 
104 | /// Matches an `incorrect` subtitle list to a `reference` subtitle list.
105 | ///
106 | /// Returns the delta for every time span in list.
107 | ///
108 | /// The `split_penalty_normalized` is a value between
109 | /// 0 and 1000. Providing 0 will make the algorithm indifferent of splitting lines (resulting in MANY
110 | /// different deltas), so this is not recommended. Providing 1000 will assure that no split will occur,
111 | /// so only one/the best offset is applied to ALL lines. The most common useful values are in the
112 | /// 4 to 20 range (optimum 7+-1).
113 | ///
114 | /// Especially for larger subtitles (e.g. 1 hour in millisecond resolution and 1000 subtitle lines) this
115 | /// process might take some seconds. To provide user feedback one can pass a `ProgressHandler` to
116 | /// this function.
117 | ///
118 | /// If you want to increase the speed of the alignment process, you can use the `speed_optimization`
119 | /// parameter. This value can be between `0` and `+inf`, altough after `10` the accuracy
120 | /// will have greatly degraded. It is recommended to supply a value around `3`.
121 | ///
122 | /// Use `standard_scoring` as score function if no fine tuning is required.
123 | pub fn align(
124 |     reference: &[TimeSpan],
125 |     list: &[TimeSpan],
126 |     split_penalty: f64,
127 |     speed_optimization: Option<f64>,
128 |     score_fn: impl Fn(TimeDelta, TimeDelta) -> f64 + Copy,
129 |     progress_handler: impl ProgressHandler,
130 | ) -> (Vec<TimeDelta>, f64) {
131 |     let (list_nonoverlapping, list_indices) = prepare_time_spans(&list);
132 |     let (ref_nonoverlapping, _) = prepare_time_spans(&reference);
133 | 
134 |     if list_nonoverlapping.is_empty() || ref_nonoverlapping.is_empty() {
135 |         return (vec![TimeDelta::zero(); list.len()], 0.);
136 |     }
137 | 
138 |     let nosplit_bonus = denormalize_split_penalty(ref_nonoverlapping.len(), list_nonoverlapping.len(), split_penalty);
139 | 
140 |     // get deltas for non-overlapping timespans
141 |     let (deltas, score) = Aligner::align_with_splits(
142 |         &ref_nonoverlapping,
143 |         &list_nonoverlapping,
144 |         nosplit_bonus,
145 |         speed_optimization,
146 |         score_fn,
147 |         progress_handler,
148 |     );
149 | 
150 |     // get deltas for overlapping timspan-list
151 |     (
152 |         list_indices.into_iter().map(|i| deltas[i]).collect(),
153 |         score.as_readable_f64(),
154 |     )
155 | }
156 | 
157 | /// Calculate the split score (see thesis in repository of source code).
158 | pub fn get_split_rating(
159 |     ref_spans: &[TimeSpan],
160 |     in_spans: &[TimeSpan],
161 |     offets: &[TimeDelta],
162 |     split_penalty: f64,
163 |     score_fn: impl Fn(TimeDelta, TimeDelta) -> f64 + Copy,
164 | ) -> Score {
165 |     let mut total_rating = get_nosplit_rating_iter(ref_spans.iter().cloned(), in_spans.iter().cloned(), score_fn);
166 | 
167 |     let nosplit_bonus = denormalize_split_penalty(ref_spans.len(), in_spans.len(), split_penalty);
168 | 
169 |     total_rating = Rating::add_mul_usize(
170 |         total_rating,
171 |         -nosplit_bonus,
172 |         offets
173 |             .iter()
174 |             .cloned()
175 |             .zip(offets.iter().skip(1).cloned())
176 |             .filter(|(o1, o2)| o1 != o2)
177 |             .count(),
178 |     );
179 | 
180 |     total_rating.as_readable_f64()
181 | }
182 | 
183 | /// Calculate the no-split score (see thesis in repository of source code).
184 | pub fn get_nosplit_score(
185 |     ref_spans: impl Iterator<Item = TimeSpan>,
186 |     in_spans: impl Iterator<Item = TimeSpan>,
187 |     score_fn: impl Fn(TimeDelta, TimeDelta) -> f64 + Copy,
188 | ) -> Score {
189 |     get_nosplit_rating_iter(ref_spans, in_spans, score_fn).as_readable_f64()
190 | }
191 | 
192 | fn get_nosplit_rating_iter(
193 |     mut ref_spans: impl Iterator<Item = TimeSpan>,
194 |     mut in_spans: impl Iterator<Item = TimeSpan>,
195 |     score_fn: impl Fn(TimeDelta, TimeDelta) -> f64 + Copy,
196 | ) -> Rating {
197 |     let mut total_rating = Rating::zero();
198 | 
199 |     let mut ref_span;
200 |     let mut in_span;
201 | 
202 |     let ref_span_opt = ref_spans.next();
203 |     match ref_span_opt {
204 |         None => return total_rating,
205 |         Some(v) => ref_span = v,
206 |     }
207 | 
208 |     let in_span_opt = in_spans.next();
209 |     match in_span_opt {
210 |         None => return total_rating,
211 |         Some(v) => in_span = v,
212 |     }
213 | 
214 |     loop {
215 |         let rating = Rating::from_timespans(ref_span, in_span, score_fn);
216 |         total_rating += rating;
217 | 
218 |         if ref_span.end() <= in_span.end() {
219 |             let ref_span_opt = ref_spans.next();
220 |             match ref_span_opt {
221 |                 None => return total_rating,
222 |                 Some(v) => ref_span = v,
223 |             }
224 |         } else {
225 |             let in_span_opt = in_spans.next();
226 |             match in_span_opt {
227 |                 None => return total_rating,
228 |                 Some(v) => in_span = v,
229 |             }
230 |         }
231 |     }
232 | }
233 | 
234 | #[cfg(test)]
235 | mod tests {
236 |     use super::*;
237 |     use crate::{prepare_time_spans, TimePoint};
238 |     use rand;
239 |     use rand::RngCore;
240 | 
241 |     /// Some special time span sequences.
242 |     fn predefined_time_spans() -> Vec<Vec<TimeSpan>> {
243 |         let t0 = TimePoint::from(0);
244 |         let t1000 = TimePoint::from(1000);
245 |         let t2000 = TimePoint::from(2000);
246 |         vec![
247 |             vec![],
248 |             vec![TimeSpan::new(t0, t0)],
249 |             vec![TimeSpan::new(t0, t1000)],
250 |             vec![TimeSpan::new(t0, t1000), TimeSpan::new(t1000, t1000)],
251 |             vec![
252 |                 TimeSpan::new(t0, t1000),
253 |                 TimeSpan::new(t1000, t1000),
254 |                 TimeSpan::new(t1000, t2000),
255 |             ],
256 |             vec![TimeSpan::new(t1000, t1000), TimeSpan::new(t1000, t1000)],
257 |         ]
258 |     }
259 | 
260 |     /// Generate random time span sequences
261 |     fn generate_random_time_spans() -> Vec<TimeSpan> {
262 |         let mut rng = rand::thread_rng();
263 | 
264 |         let len: usize = (rng.next_u32() % 400) as usize;
265 |         let mut v = Vec::with_capacity(len);
266 |         let mut current_pos = 0i64;
267 |         for _ in 0..len {
268 |             current_pos += (rng.next_u32() % 200) as i64 - 50;
269 |             let current_len = (rng.next_u32() % 400) as i64;
270 |             v.push(TimeSpan::new(
271 |                 TimePoint::from(current_pos),
272 |                 TimePoint::from(current_pos + current_len),
273 |             ));
274 |         }
275 | 
276 |         v
277 |     }
278 | 
279 |     /// All test time span sequences (some are predefined some are random).
280 |     pub fn get_test_time_spans() -> Vec<Vec<TimeSpan>> {
281 |         (0..1000)
282 |             .map(|_| generate_random_time_spans())
283 |             .chain(predefined_time_spans().into_iter())
284 |             .collect()
285 |     }
286 | 
287 |     /// All test time span sequences (some are predefined some are random).
288 |     pub fn get_random_prepared_test_time_spans() -> Vec<TimeSpan> {
289 |         prepare_time_spans(&generate_random_time_spans()).0
290 |     }
291 | }
292 | 


--------------------------------------------------------------------------------
/alass-core/src/rating_type.rs:
--------------------------------------------------------------------------------
  1 | // This file is part of the Rust library and binary `alass`.
  2 | //
  3 | // Copyright (C) 2017 kaegi
  4 | //
  5 | // This program is free software: you can redistribute it and/or modify
  6 | // it under the terms of the GNU General Public License as published by
  7 | // the Free Software Foundation, either version 3 of the License, or
  8 | // (at your option) any later version.
  9 | //
 10 | // This program is distributed in the hope that it will be useful,
 11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 | // GNU General Public License for more details.
 14 | //
 15 | // You should have received a copy of the GNU General Public License
 16 | // along with this program.  If not, see <http://www.gnu.org/licenses/>.
 17 | 
 18 | pub use rating_i64::*;
 19 | 
 20 | /*mod rating_f64 {
 21 |     use crate::{TimeDelta, TimeSpan};
 22 |     use ordered_float::NotNan;
 23 |     use std::cmp::{max, min};
 24 | 
 25 |     // these objects determine the precision/length of the rating (i32/i64) - lower
 26 |     // values take less space and time, higher values have higher precision
 27 |     pub type Rating = NotNan<f64>;
 28 |     pub type RatingDelta = NotNan<f64>;
 29 |     pub type RatingDeltaDelta = NotNan<f64>;
 30 | 
 31 |     pub trait RatingExt {
 32 |         #[inline]
 33 |         fn add_mul(r: Rating, rd: RatingDelta, td: TimeDelta) -> Rating {
 34 |             r + rd * td.as_f64()
 35 |         }
 36 | 
 37 |         #[inline]
 38 |         fn add_mul_usize(r: Rating, rd: RatingDelta, td: usize) -> Rating {
 39 |             r + rd * td as f64
 40 |         }
 41 | 
 42 |         #[inline]
 43 |         fn compute(a: TimeDelta, b: TimeDelta) -> Rating {
 44 |             let min: f64 = min(a, b).as_f64();
 45 |             let max: f64 = max(a, b).as_f64();
 46 |             NotNan::from(min / max)
 47 |         }
 48 | 
 49 |         /*#[inline]
 50 |         fn compute(a: TimeDelta, b: TimeDelta) -> i64 {
 51 |             let min: i64 = min(a, b).as_i64();
 52 |             let max: i64 = max(a, b).as_i64();
 53 |            (min * RATING_PRECISION) / max
 54 |         }*/
 55 | 
 56 |         #[inline]
 57 |         fn compute2(a: TimeDelta, b: TimeDelta) -> Rating {
 58 |             //Self::compute(a, b)
 59 |             Self::compute(a, b)
 60 |         }
 61 | 
 62 |         #[inline]
 63 |         fn from_timespans(a: TimeSpan, b: TimeSpan) -> Rating {
 64 |             let overlap = TimeSpan::get_overlapping_length(a, b).as_f64();
 65 |             let max_rating = Rating::compute2(a.len(), b.len());
 66 |             let length_normalization_factor = min(a.len(), b.len()).as_f64();
 67 | 
 68 |             NotNan::from(max_rating * overlap / length_normalization_factor)
 69 |         }
 70 | 
 71 |         #[inline]
 72 |         fn nosplit_bonus(unnormalized: f64) -> Rating {
 73 |             NotNan::from(unnormalized)
 74 |         }
 75 | 
 76 |         #[inline]
 77 |         fn convert_from_f64(v: f64) -> Rating {
 78 |             NotNan::from(v)
 79 |         }
 80 | 
 81 |         #[inline]
 82 |         fn zero() -> Rating {
 83 |             NotNan::from(0.)
 84 |         }
 85 | 
 86 |         #[inline]
 87 |         fn is_zero(self) -> bool;
 88 | 
 89 |         #[inline]
 90 |         fn as_f32(self) -> f32;
 91 | 
 92 |         #[inline]
 93 |         fn as_f64(self) -> f64;
 94 | 
 95 |         #[inline]
 96 |         fn as_readable_f32(self) -> f32;
 97 | 
 98 |     }
 99 |     impl RatingExt for Rating {
100 |         #[inline]
101 |         fn is_zero(self) -> bool {
102 |             const EPSILON: f64 = 0.0000001f64;
103 |             self.into_inner() > -EPSILON && self.into_inner() < EPSILON
104 |         }
105 | 
106 |         #[inline]
107 |         fn as_f32(self) -> f32 {
108 |             self.into_inner() as f32
109 |         }
110 | 
111 |         #[inline]
112 |         fn as_f64(self) -> f64 {
113 |             self.into_inner()
114 |         }
115 | 
116 |         #[inline]
117 |         fn as_readable_f32(self) -> f32 {
118 |             self.into_inner() as f32
119 |         }
120 |     }
121 | 
122 |     pub trait RatingDeltaExt {
123 |         #[inline]
124 |         fn compute_rating_delta(a: TimeDelta, b: TimeDelta) -> RatingDelta {
125 |             let min: NotNan<f64> = NotNan::from(min(a, b).as_f64());
126 |             Rating::compute(a, b) / min
127 |             //Rating::compute(a, b) / min(a, b).as_i64()
128 |         }
129 |     }
130 |     impl RatingDeltaExt for RatingDelta {}
131 | 
132 | }*/
133 | 
134 | mod rating_i64 {
135 |     use crate::{TimeDelta, TimeSpan};
136 |     use std::cmp::min;
137 | 
138 |     // these objects determine the precision/length of the rating (i32/i64) - lower
139 |     // values take less space and time, higher values have higher precision
140 |     pub type Rating = i64;
141 |     pub type RatingDelta = i64;
142 |     pub type RatingDeltaDelta = i64;
143 | 
144 |     const RATING_PRECISION: i64 = 1 << 32;
145 | 
146 |     pub trait RatingExt {
147 |         #[inline]
148 |         fn add_mul(r: Rating, rd: RatingDelta, td: TimeDelta) -> Rating {
149 |             r + rd * td.as_i64()
150 |         }
151 | 
152 |         #[inline]
153 |         fn add_mul_usize(r: Rating, rd: RatingDelta, td: usize) -> Rating {
154 |             r + rd * td as i64
155 |         }
156 | 
157 |         #[inline]
158 |         fn from_timespans(a: TimeSpan, b: TimeSpan, score_fn: impl Fn(TimeDelta, TimeDelta) -> f64 + Copy) -> Rating {
159 |             let overlap = TimeSpan::get_overlapping_length(a, b).as_f64();
160 |             let max_rating = score_fn(a.len(), b.len());
161 |             let length_normalization_factor = min(a.len(), b.len()).as_f64();
162 | 
163 |             Rating::convert_from_f64(max_rating * overlap / length_normalization_factor)
164 |         }
165 | 
166 |         #[inline]
167 |         fn convert_from_f64(v: f64) -> Rating {
168 |             (v * RATING_PRECISION as f64) as i64
169 |         }
170 | 
171 |         #[inline]
172 |         fn zero() -> Rating {
173 |             0
174 |         }
175 | 
176 |         fn is_zero(self) -> bool;
177 | 
178 |         #[inline]
179 |         fn div_by_delta_to_i64(r: Rating, other: RatingDelta) -> i64 {
180 |             r / other
181 |         }
182 | 
183 |         #[inline]
184 |         fn div_by_i64_to_delta(r: Rating, other: i64) -> RatingDelta {
185 |             r / other
186 |         }
187 | 
188 |         fn as_readable_f32(self) -> f32;
189 | 
190 |         fn as_readable_f64(self) -> f64;
191 |     }
192 | 
193 |     impl RatingExt for Rating {
194 |         #[inline]
195 |         fn is_zero(self) -> bool {
196 |             self == 0
197 |         }
198 | 
199 |         #[inline]
200 |         fn as_readable_f32(self) -> f32 {
201 |             self as f32 / RATING_PRECISION as f32
202 |         }
203 | 
204 |         #[inline]
205 |         fn as_readable_f64(self) -> f64 {
206 |             self as f64 / RATING_PRECISION as f64
207 |         }
208 |     }
209 | 
210 |     pub trait RatingDeltaExt {
211 |         #[inline]
212 |         fn compute_rating_delta(
213 |             a: TimeDelta,
214 |             b: TimeDelta,
215 |             score_fn: impl Fn(TimeDelta, TimeDelta) -> f64 + Copy,
216 |         ) -> RatingDelta {
217 |             let min: f64 = min(a, b).as_f64();
218 |             RatingDelta::convert_from_f64(score_fn(a, b) / min)
219 |             //Rating::compute(a, b) / min(a, b).as_i64()
220 |         }
221 |     }
222 |     impl RatingDeltaExt for RatingDelta {}
223 | }
224 | 


--------------------------------------------------------------------------------
/alass-core/src/time_types.rs:
--------------------------------------------------------------------------------
  1 | // This file is part of the Rust library and binary `alass`.
  2 | //
  3 | // Copyright (C) 2017 kaegi
  4 | //
  5 | // This program is free software: you can redistribute it and/or modify
  6 | // it under the terms of the GNU General Public License as published by
  7 | // the Free Software Foundation, either version 3 of the License, or
  8 | // (at your option) any later version.
  9 | //
 10 | // This program is distributed in the hope that it will be useful,
 11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 | // GNU General Public License for more details.
 14 | //
 15 | // You should have received a copy of the GNU General Public License
 16 | // along with this program.  If not, see <http://www.gnu.org/licenses/>.
 17 | 
 18 | use std;
 19 | use std::cmp::{max, min, Ordering};
 20 | use std::ops::*;
 21 | 
 22 | /// Implements conversion to integer variables for TimeDelta and TimePoint.
 23 | macro_rules! impl_from {
 24 |     ($f:ty, $t:ty) => {
 25 |         impl From<$f> for $t {
 26 |             fn from(t: $f) -> $t {
 27 |                 t.0 as $t
 28 |             }
 29 |         }
 30 |     };
 31 | }
 32 | 
 33 | /// This struct represents a time difference between two `TimePoints`.
 34 | /// Internally its an integer type.
 35 | #[derive(Copy, Clone, Debug, PartialOrd, Ord, PartialEq, Eq, Hash)]
 36 | pub struct TimeDelta(i64);
 37 | 
 38 | impl TimeDelta {
 39 |     /// No difference in time.
 40 |     pub fn zero() -> TimeDelta {
 41 |         TimeDelta(Default::default())
 42 |     }
 43 | 
 44 |     /// Smallest positive time difference the library can work with.
 45 |     pub fn one() -> TimeDelta {
 46 |         TimeDelta(1)
 47 |     }
 48 | 
 49 |     /// Create time delta as "TimeDelta::one() * v".
 50 |     pub fn from_i64(v: i64) -> TimeDelta {
 51 |         TimeDelta(v)
 52 |     }
 53 | 
 54 |     /// Return time difference as f64.
 55 |     pub fn as_f64(&self) -> f64 {
 56 |         self.0 as f64
 57 |     }
 58 | 
 59 |     /// Return time difference as f64.
 60 |     pub fn as_f32(&self) -> f32 {
 61 |         self.0 as f32
 62 |     }
 63 | 
 64 |     /// Return time difference as i64.
 65 |     pub fn as_i64(&self) -> i64 {
 66 |         self.0 as i64
 67 |     }
 68 | }
 69 | 
 70 | impl_from!(TimeDelta, i32);
 71 | impl_from!(TimeDelta, u32);
 72 | impl_from!(TimeDelta, i64);
 73 | impl_from!(TimeDelta, u64);
 74 | 
 75 | impl std::iter::Sum for TimeDelta {
 76 |     fn sum<I: Iterator<Item = TimeDelta>>(iter: I) -> TimeDelta {
 77 |         TimeDelta(iter.map(|d| d.0).sum())
 78 |     }
 79 | }
 80 | 
 81 | impl std::fmt::Display for TimePoint {
 82 |     fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
 83 |         write!(f, "{}", self.0)
 84 |     }
 85 | }
 86 | impl std::fmt::Display for TimeDelta {
 87 |     fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
 88 |         write!(f, "{}", self.0)
 89 |     }
 90 | }
 91 | 
 92 | impl Add for TimeDelta {
 93 |     type Output = TimeDelta;
 94 |     fn add(self, rhs: TimeDelta) -> TimeDelta {
 95 |         TimeDelta(self.0 + rhs.0)
 96 |     }
 97 | }
 98 | 
 99 | impl AddAssign<TimeDelta> for TimeDelta {
100 |     fn add_assign(&mut self, rhs: TimeDelta) {
101 |         self.0 += rhs.0;
102 |     }
103 | }
104 | 
105 | impl Sub<TimeDelta> for TimeDelta {
106 |     type Output = TimeDelta;
107 |     fn sub(self, rhs: TimeDelta) -> TimeDelta {
108 |         TimeDelta(self.0 - rhs.0)
109 |     }
110 | }
111 | 
112 | impl SubAssign<TimeDelta> for TimeDelta {
113 |     fn sub_assign(&mut self, rhs: TimeDelta) {
114 |         self.0 -= rhs.0;
115 |     }
116 | }
117 | 
118 | impl Mul<i64> for TimeDelta {
119 |     type Output = TimeDelta;
120 |     fn mul(self, rhs: i64) -> TimeDelta {
121 |         TimeDelta(self.0 * rhs)
122 |     }
123 | }
124 | 
125 | impl MulAssign<i64> for TimeDelta {
126 |     fn mul_assign(&mut self, rhs: i64) {
127 |         self.0 *= rhs;
128 |     }
129 | }
130 | 
131 | impl Mul<TimeDelta> for i64 {
132 |     type Output = TimeDelta;
133 |     fn mul(self, rhs: TimeDelta) -> TimeDelta {
134 |         TimeDelta(self * rhs.0)
135 |     }
136 | }
137 | 
138 | impl Neg for TimeDelta {
139 |     type Output = TimeDelta;
140 |     fn neg(self) -> TimeDelta {
141 |         TimeDelta(-self.0)
142 |     }
143 | }
144 | 
145 | // //////////////////////////////////////////////////////////////////////////////////////////////////
146 | // struct TimeSpan
147 | 
148 | /// Represents a timepoint in your own metric.
149 | ///
150 | /// A timepoint is internally represented by an integer (because the align
151 | /// algorithm needs discrete
152 | /// time steps). You will have to choose your own metric: for example 1i64 means
153 | /// 2ms. The internal algorithm does not use any non-user given `TimePoint`s
154 | /// (so its interpretation is
155 | /// up to you).
156 | ///
157 | /// This is the reason this library works with `TimePoint` and `TimeDelta`: to
158 | /// enforce
159 | /// an absolute and delta relationship an a own metric.
160 | ///
161 | /// The only way to create a new `TimePoint` is with `TimePoint::from({i64})`.
162 | ///
163 | /// ```
164 | /// use alass_core::TimePoint;
165 | ///
166 | /// let p = TimePoint::from(10);
167 | ///
168 | /// // to get that i64 again
169 | /// let i1: i64 = p.into();
170 | /// let i2 = i64::from(p);
171 | /// ```
172 | ///
173 | #[derive(Copy, Clone, Debug, PartialOrd, Ord, PartialEq, Eq, Hash)]
174 | pub struct TimePoint(i64);
175 | 
176 | impl TimePoint {
177 |     /// Returns a f32 for the given time point.
178 |     pub fn as_f32(self) -> f32 {
179 |         self.0 as f32
180 |     }
181 | 
182 |     /// Returns a f32 for the given time point.
183 |     pub fn as_f64(self) -> f64 {
184 |         self.0 as f64
185 |     }
186 | 
187 |     /// Returns a i64 for the given time point.
188 |     pub fn as_i64(self) -> i64 {
189 |         self.0 as i64
190 |     }
191 | }
192 | 
193 | impl From<i64> for TimePoint {
194 |     fn from(f: i64) -> TimePoint {
195 |         TimePoint(f)
196 |     }
197 | }
198 | impl_from!(TimePoint, i64);
199 | 
200 | impl Sub for TimePoint {
201 |     type Output = TimeDelta;
202 |     fn sub(self, rhs: TimePoint) -> TimeDelta {
203 |         TimeDelta(self.0 - rhs.0)
204 |     }
205 | }
206 | 
207 | impl Add<TimeDelta> for TimePoint {
208 |     type Output = TimePoint;
209 |     fn add(self, rhs: TimeDelta) -> TimePoint {
210 |         TimePoint(self.0 + rhs.0)
211 |     }
212 | }
213 | 
214 | impl AddAssign<TimeDelta> for TimePoint {
215 |     fn add_assign(&mut self, rhs: TimeDelta) {
216 |         self.0 += rhs.0;
217 |     }
218 | }
219 | 
220 | impl Sub<TimeDelta> for TimePoint {
221 |     type Output = TimePoint;
222 |     fn sub(self, rhs: TimeDelta) -> TimePoint {
223 |         TimePoint(self.0 - rhs.0)
224 |     }
225 | }
226 | 
227 | impl SubAssign<TimeDelta> for TimePoint {
228 |     fn sub_assign(&mut self, rhs: TimeDelta) {
229 |         self.0 -= rhs.0;
230 |     }
231 | }
232 | 
233 | // //////////////////////////////////////////////////////////////////////////////////////////////////
234 | // struct TimeSpan
235 | 
236 | /// Represents a time span from "start" (included) to "end" (excluded).
237 | ///
238 | /// The constructors will ensure "start <= end", this condition will hold at
239 | /// any given time.
240 | #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
241 | pub struct TimeSpan {
242 |     /// The first time point of the time span (inclusive)
243 |     pub start: TimePoint,
244 | 
245 |     /// The last time point of the time span (excluded)
246 |     pub end: TimePoint,
247 | }
248 | 
249 | impl TimeSpan {
250 |     /// Create a new TimeSpan with `start` and `end`.
251 |     ///
252 |     /// # Examples
253 |     /// ```rust
254 |     /// use alass_core::{TimeSpan, TimePoint};
255 |     ///
256 |     /// let t0 = TimePoint::from(0);
257 |     /// let t10 = TimePoint::from(10);
258 |     ///
259 |     /// let ts = TimeSpan::new(t0, t10);
260 |     /// ```
261 |     ///
262 |     /// # Panics
263 |     ///
264 |     ///
265 |     /// This function asserts that `start` is less or equal `end`.
266 |     ///
267 |     /// ```rust,should_panic
268 |     /// use alass_core::{TimeSpan, TimePoint};
269 |     ///
270 |     /// let t0 = TimePoint::from(0);
271 |     /// let t10 = TimePoint::from(10);
272 |     ///
273 |     /// // this will case a panic
274 |     /// let ts = TimeSpan::new(t10, t0);
275 |     /// ```
276 |     #[inline]
277 |     pub fn new(start: TimePoint, end: TimePoint) -> TimeSpan {
278 |         assert!(start <= end);
279 |         TimeSpan { start: start, end: end }
280 |     }
281 | 
282 |     /// Create a new TimeSpan with `start` and `end`. This function will not
283 |     /// panic on `end < start`, but
284 |     /// swap the values before calling `TimeSpan::new()`.
285 |     ///
286 |     /// # Examples
287 |     /// ```rust
288 |     /// use alass_core::{TimeSpan, TimePoint};
289 |     ///
290 |     /// let t0 = TimePoint::from(0);
291 |     /// let t10 = TimePoint::from(10);
292 |     ///
293 |     /// let ts = TimeSpan::new_safe(t10, t0);
294 |     /// assert!(ts.start() == t0 && ts.end() == t10);
295 |     /// ```
296 |     pub fn new_safe(start: TimePoint, end: TimePoint) -> TimeSpan {
297 |         if end < start {
298 |             TimeSpan::new(end, start)
299 |         } else {
300 |             TimeSpan::new(start, end)
301 |         }
302 |     }
303 | 
304 |     /// Mutates a `TimeSpan`s end.
305 |     ///
306 |     /// # Panics
307 |     ///
308 |     /// Will panic if `new_end` is less than current `start`.
309 |     pub fn new_copy_with_end(self, new_end: TimePoint) -> TimeSpan {
310 |         TimeSpan::new(self.start, new_end)
311 |     }
312 | 
313 |     /// Returns the length of the `TimeSpan`.
314 |     ///
315 |     /// `len()` is zero, if and only if `start` is `end`.
316 |     pub fn len(self) -> TimeDelta {
317 |         self.end - self.start
318 |     }
319 | 
320 |     /// Returns true if `start == end`.
321 |     pub fn is_empty(self) -> bool {
322 |         self.end == self.start
323 |     }
324 | 
325 |     /// Returns the start point of the `TimeSpan`.
326 |     #[inline(always)]
327 |     pub fn start(self) -> TimePoint {
328 |         self.start
329 |     }
330 | 
331 |     /// Returns the end point of the `TimeSpan`.
332 |     #[inline(always)]
333 |     pub fn end(self) -> TimePoint {
334 |         self.end
335 |     }
336 | 
337 |     /// Returns one (of the possibly two) points in the center of the `TimeSpan`.
338 |     pub fn half(self) -> TimePoint {
339 |         TimePoint::from((self.start.as_i64() + self.end.as_i64()) / 2)
340 |     }
341 | 
342 |     /// Returns true if `self` contains `TimeSpan` `other`.
343 |     ///
344 |     /// # Examples
345 |     /// ```
346 |     /// use alass_core::{TimeSpan, TimePoint};
347 |     /// ```
348 |     pub fn contains(self, other: TimeSpan) -> bool {
349 |         other.start >= self.start && other.end <= self.end
350 |     }
351 | 
352 |     /// Returns the smallest difference between two `TimeSpan`s.
353 |     ///
354 |     /// ```
355 |     /// use alass_core::{TimeSpan, TimePoint, TimeDelta};
356 |     ///
357 |     /// let p = TimePoint::from(0);
358 |     /// let d = TimeDelta::one();
359 |     ///
360 |     /// let ts1 = TimeSpan::new(p, p + 10 * d);
361 |     /// let ts4 = TimeSpan::new(p + 20 * d, p + 100 * d);
362 |     ///
363 |     /// assert!(TimeSpan::fast_distance_to(ts1, ts1) == 0 * d);
364 |     /// assert!(TimeSpan::fast_distance_to(ts1, ts4) == 10 * d);
365 |     /// assert!(TimeSpan::fast_distance_to(ts4, ts1) == 10 * d);
366 |     /// assert!(TimeSpan::fast_distance_to(ts4, ts4) == 0 * d);
367 |     /// ```
368 |     pub fn fast_distance_to(self, other: TimeSpan) -> TimeDelta {
369 |         // self < other
370 |         if self.end < other.start {
371 |             other.start - self.end
372 |         }
373 |         // self > other
374 |         else if self.start > other.end {
375 |             self.start - other.end
376 |         }
377 |         // self and other overlap
378 |         else {
379 |             TimeDelta::zero()
380 |         }
381 |     }
382 | 
383 |     /// Returns the smallest difference between two `TimeSpan`s.
384 |     pub fn get_overlapping_length(self, other: TimeSpan) -> TimeDelta {
385 |         let start_max = max(self.start, other.start);
386 |         let end_min = min(self.end, other.end);
387 |         max(TimeDelta::zero(), end_min - start_max)
388 |     }
389 | 
390 |     /// Scale start and end time point to zero by `scaling_factor`.
391 |     pub fn scaled(self, scaling_factor: f64) -> TimeSpan {
392 |         let new_start = TimePoint::from((self.start.as_f64() * scaling_factor) as i64);
393 |         let new_end = TimePoint::from((self.end.as_f64() * scaling_factor) as i64);
394 |         TimeSpan::new(new_start, new_end)
395 |     }
396 | 
397 |     /// Compares two `TimeSpan`s by their start timepoint.
398 |     pub fn cmp_start(self, other: TimeSpan) -> Ordering {
399 |         self.start.cmp(&other.start)
400 |     }
401 | 
402 |     /// Compares two `TimeSpan`s by their end timepoint.
403 |     pub fn cmp_end(self, other: TimeSpan) -> Ordering {
404 |         self.end.cmp(&other.end)
405 |     }
406 | }
407 | 
408 | impl Add<TimeDelta> for TimeSpan {
409 |     type Output = TimeSpan;
410 |     fn add(self, rhs: TimeDelta) -> TimeSpan {
411 |         TimeSpan::new(self.start + rhs, self.end + rhs)
412 |     }
413 | }
414 | 


--------------------------------------------------------------------------------
/alass-core/src/timespan_ops.rs:
--------------------------------------------------------------------------------
  1 | // This file is part of the Rust library and binary `alass`.
  2 | //
  3 | // Copyright (C) 2017 kaegi
  4 | //
  5 | // This program is free software: you can redistribute it and/or modify
  6 | // it under the terms of the GNU General Public License as published by
  7 | // the Free Software Foundation, either version 3 of the License, or
  8 | // (at your option) any later version.
  9 | //
 10 | // This program is distributed in the hope that it will be useful,
 11 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 | // GNU General Public License for more details.
 14 | //
 15 | // You should have received a copy of the GNU General Public License
 16 | // along with this program.  If not, see <http://www.gnu.org/licenses/>.
 17 | 
 18 | use crate::{TimeDelta, TimeSpan};
 19 | use std;
 20 | use std::cmp::max;
 21 | 
 22 | fn prepare_spans_sorted(overlapping: &[TimeSpan]) -> (Vec<TimeSpan>, Vec<usize>) {
 23 |     if overlapping.is_empty() {
 24 |         return (Vec::new(), Vec::new());
 25 |     }
 26 | 
 27 |     // the constructor of TimeSpan ensures "start <= end"
 28 | 
 29 |     // sort the spans by starting time but save the permuation through enumeration
 30 |     let mut sorted_overlapping: Vec<(usize, TimeSpan)> = overlapping.iter().cloned().enumerate().collect();
 31 |     sorted_overlapping.sort_by(|a, b| TimeSpan::cmp_start(a.1, b.1));
 32 | 
 33 |     // create a mapping from "original vector index -> sorted vector index"
 34 |     let mut mapping = std::vec::from_elem(0usize, overlapping.len());
 35 |     for (i2, &(i, _)) in sorted_overlapping.iter().enumerate() {
 36 |         mapping[i] = i2;
 37 |     }
 38 | 
 39 |     (sorted_overlapping.into_iter().map(|(_, ts)| ts).collect(), mapping)
 40 | }
 41 | 
 42 | /// Returns a smaller list of non-overlapping time spans and a vector with
 43 | /// original length which contains a mapping from "index in overlapping vector
 44 | /// -> index in non-overlapping-vector"
 45 | /// Requires that all spans are sorted by start time and the vector is not
 46 | /// empty.
 47 | fn prepare_spans_non_overlapping(v: &[TimeSpan]) -> (Vec<TimeSpan>, Vec<usize>) {
 48 |     if v.is_empty() {
 49 |         return (Vec::new(), Vec::new());
 50 |     }
 51 | 
 52 |     // condense overlapping time spans and create a mapping "sorted vector index ->
 53 |     // non-overlapping vector index"
 54 |     let mut result: Vec<TimeSpan> = Vec::with_capacity(v.len());
 55 |     let mut mapping: Vec<usize> = Vec::with_capacity(v.len());
 56 |     let mut current_end = v[0].start(); // this does not overlap with first time span
 57 |     for ts in v {
 58 |         if ts.start() < current_end {
 59 |             // timespans overlap -> only extend current timespan (if anything at all)
 60 |             let last_element_index = result.len() - 1;
 61 |             current_end = max(current_end, ts.end());
 62 |             result[last_element_index] = result[last_element_index].new_copy_with_end(current_end);
 63 |         } else {
 64 |             // time span does not overlap
 65 |             result.push(*ts);
 66 |             current_end = ts.end();
 67 |         }
 68 | 
 69 |         // the currennt time span is now inside the last new timespan
 70 |         mapping.push(result.len() - 1);
 71 |     }
 72 | 
 73 |     (result, mapping)
 74 | }
 75 | 
 76 | /// `v` should only contain non-overlapping sorted timespans, sorted by
 77 | /// starting time.
 78 | /// Returns a list of time-spans without spans of zero-length. The zero-length
 79 | /// time spans
 80 | /// are grouped together with next or previous time spans.
 81 | fn prepare_spans_nonzero(v: &[TimeSpan]) -> (Vec<TimeSpan>, Vec<usize>) {
 82 |     // list of non-zero spans
 83 |     let non_zero_spans: Vec<TimeSpan> = v.iter().cloned().filter(|&ts| ts.len() > TimeDelta::zero()).collect();
 84 |     if non_zero_spans.is_empty() {
 85 |         return (Vec::new(), Vec::new());
 86 |     }
 87 | 
 88 |     let mut new_index = 0;
 89 |     let mut indices = Vec::with_capacity(v.len());
 90 |     for ts in v {
 91 |         if ts.len() != TimeDelta::zero() {
 92 |             // this timespan is in the non_zero_spans vector -> go to the right index
 93 |             indices.push(new_index);
 94 |             new_index += 1;
 95 |             continue;
 96 |         }
 97 | 
 98 |         let prev_nonzero_ts_opt = if new_index == 0 {
 99 |             None
100 |         } else {
101 |             Some(non_zero_spans[new_index - 1])
102 |         };
103 |         let next_nonzero_ts_opt = if new_index == non_zero_spans.len() {
104 |             None
105 |         } else {
106 |             Some(non_zero_spans[new_index])
107 |         };
108 | 
109 |         let merge_with_prev = match (prev_nonzero_ts_opt, next_nonzero_ts_opt) {
110 |             (None, None) => panic!("No previous or next span in non-empty non_zero_span vector"),
111 |             (Some(_), None) => true,
112 |             (None, Some(_)) => false,
113 |             (Some(p), Some(n)) => ts.fast_distance_to(p) <= ts.fast_distance_to(n),
114 |         };
115 | 
116 |         indices.push(if merge_with_prev { new_index - 1 } else { new_index });
117 |     }
118 | 
119 |     (non_zero_spans, indices)
120 | }
121 | 
122 | pub fn prepare_time_spans(v: &[TimeSpan]) -> (Vec<TimeSpan>, Vec<usize>) {
123 |     if v.is_empty() {
124 |         return (Vec::new(), Vec::new());
125 |     }
126 | 
127 |     let operations = [
128 |         prepare_spans_sorted,
129 |         prepare_spans_non_overlapping,
130 |         prepare_spans_nonzero,
131 |     ];
132 |     let mut mapping: Vec<usize> = (0..v.len()).collect();
133 |     let mut result: Vec<TimeSpan> = v.to_vec();
134 |     for &operation in &operations {
135 |         let (new_result, new_mapping) = (operation)(&result);
136 |         if new_result.is_empty() {
137 |             return (Vec::new(), Vec::new());
138 |         }
139 |         mapping = mapping.iter().map(|&i| new_mapping[i]).collect();
140 |         result = new_result;
141 |     }
142 | 
143 |     (result, mapping)
144 | }
145 | 
146 | #[cfg(test)]
147 | mod tests {
148 |     use super::*;
149 |     use crate::prepare_time_spans;
150 |     use crate::tests::get_test_time_spans;
151 | 
152 |     #[test]
153 |     fn test_prepare_time_spans() {
154 |         for time_spans in get_test_time_spans() {
155 |             let (non_overlapping, indices) = prepare_time_spans(&time_spans);
156 | 
157 |             assert!(non_overlapping.len() <= time_spans.len());
158 | 
159 |             // function will condense non-zero timespans into one -> vector of zero-length
160 |             // timespans will turn into empty vector
161 |             let full_length: i64 = time_spans
162 |                 .iter()
163 |                 .cloned()
164 |                 .map(|time_spans| i64::from(time_spans.len()))
165 |                 .sum();
166 |             if full_length == 0 {
167 |                 assert!(non_overlapping.is_empty());
168 |                 continue;
169 |             }
170 | 
171 |             if time_spans.len() == 0 {
172 |                 continue;
173 |             }
174 |             assert!(non_overlapping.len() > 0);
175 | 
176 |             // test whether some spans overlap (they shouldn't)
177 |             non_overlapping
178 |                 .iter()
179 |                 .cloned()
180 |                 .zip(non_overlapping.iter().cloned().skip(1))
181 |                 .inspect(|&(last, current)| {
182 |                     assert!(last.start() <= last.end());
183 |                     assert!(last.end() <= current.start());
184 |                     assert!(current.start() <= current.end());
185 |                 })
186 |                 .count();
187 | 
188 |             // test mapping from "overlapping -> non-overlapping"
189 |             assert!(time_spans.len() == indices.len());
190 |             for (i, span) in time_spans.iter().cloned().enumerate() {
191 |                 assert!(non_overlapping[indices[i]].contains(span) || span.len() == TimeDelta::zero());
192 |             }
193 | 
194 |             // -----------------------------------------------------------
195 |             // apply `prepare_time_spans()` a second time which should now be a noop
196 |             let (prepared_timespans2, indices2) = prepare_time_spans(&non_overlapping);
197 |             assert_eq!(non_overlapping, prepared_timespans2);
198 |             assert_eq!(indices2, (0..indices2.len()).collect::<Vec<_>>());
199 |         }
200 |     }
201 | }
202 | 


--------------------------------------------------------------------------------
/documentation/slides.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaegi/alass/874f02d9577182752a0f969b6d6b98fd65bdf1fc/documentation/slides.pdf


--------------------------------------------------------------------------------
/documentation/thesis.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaegi/alass/874f02d9577182752a0f969b6d6b98fd65bdf1fc/documentation/thesis.pdf


--------------------------------------------------------------------------------
/rustfmt.toml:
--------------------------------------------------------------------------------
1 | reorder_imports = true
2 | max_width = 120
3 | 


--------------------------------------------------------------------------------
/statistics-helpers/export_subtitle_from_database.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3.7
 2 | 
 3 | import sys
 4 | import json
 5 | import os
 6 | import argparse
 7 | import subprocess
 8 | 
 9 | def format_srt_time(ms):
10 |     subsec_ms = ms % 1000
11 |     sec = (ms // 1000) % 60
12 |     minutes = (ms // (1000 * 60)) % 60
13 |     hours = (ms // (1000 * 60 * 60))
14 |     return '{0:0>2}:{1:0>2}:{2:0>2},{3:0>3}'.format(hours, minutes, sec, subsec_ms)
15 | 
16 | def write_subtitle_data(subtitle, path):
17 |     subtitle_data = subtitle['data']
18 |     subtitle_data.sort(key = lambda line: line['start_ms'])
19 | 
20 |     os.makedirs(os.path.dirname(path), exist_ok=True)
21 |     with open(path, "w") as srt_file:
22 |         for i, line in enumerate(subtitle_data):
23 |             time_header = '%s --> %s\n' % (format_srt_time(line['start_ms']), format_srt_time(line['end_ms']))
24 |             srt_file.writelines(['%s\n' % (i + 1), time_header, line['text'], '\n\n'])
25 | 
26 |     #print('Wrote file `%s`!' % path)
27 | 
28 | def find_movie_and_sub(data, subtitle_id):
29 |     for movie in data['movies']:
30 |         for subtitle in movie['subtitles']:
31 |             if subtitle['id'] == subtitle_id:
32 |                 return (movie, subtitle)
33 |                 
34 |     return (None, None)
35 | 
36 | if len(sys.argv) < 4:
37 |     print('Usage: program path/to/database.json path/to/output/dir subtitle_id1 subtitle_id2 subtitle_id3 ...', file=sys.stderr)
38 |     sys.exit(1)
39 | 
40 | parser = argparse.ArgumentParser(description='Export subtitle from database')
41 | parser.add_argument('--database-dir', required=True, help='directory for database files (program input)')
42 | parser.add_argument('--output-dir', required=True, help='directory for generated srt files (program output)')
43 | parser.add_argument('--sub-ids', required=True, help='IDs of requested subtitles (comma separated)')
44 | parser.add_argument('--open-mpv', action='store_true')
45 | 
46 | args = parser.parse_args()
47 | 
48 | database_path = os.path.join(args.database_dir, "database.json")
49 | output_dir = args.output_dir
50 | subtitle_ids = args.sub_ids.split(',')
51 | 
52 | with open(database_path) as json_file:
53 |     data = json.load(json_file)
54 | 
55 | for subtitle_id in subtitle_ids:
56 | 
57 |     ref_movie, subtitle_data = find_movie_and_sub(data, subtitle_id)
58 | 
59 |     if subtitle_data == None:
60 |         print('Subtitle with id `%s` not found in `%s`' % (subtitle_id, database_path))
61 |         sys.exit(1)
62 | 
63 |     ref_sub_data = ref_movie['reference_subtitle']
64 | 
65 |     out_sub_path = os.path.join(output_dir, '%s.srt' % subtitle_id)
66 |     out_ref_path = os.path.join(output_dir, '%s_ref.srt' % subtitle_id)
67 | 
68 |     write_subtitle_data(subtitle_data, out_sub_path)
69 |     write_subtitle_data(ref_sub_data, out_ref_path)
70 | 
71 |     print("subtitle id: '%s' [%s lines]" % (subtitle_id, len(subtitle_data['data'])))
72 |     print("reference subtitle id: '%s' [%s lines]" % (ref_sub_data['id'], len(ref_sub_data['data'])))
73 |     print("reference movie id: '%s'" % ref_movie['id'])
74 |     print("mpv '%s' --sub-file '%s' --sub-file '%s'" % (ref_movie['path'], out_sub_path, out_ref_path))
75 | 
76 |     if args.open_mpv:
77 |         subprocess.run(['mpv', ref_movie['path'], '--sub-file', out_sub_path, '--sub-file', out_ref_path])


--------------------------------------------------------------------------------
/statistics-helpers/generate_database_from_videolist.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python3
  2 | 
  3 | from pythonopensubtitles.opensubtitles import OpenSubtitles
  4 | import sys
  5 | import json
  6 | import zlib
  7 | import base64
  8 | import os
  9 | import errno
 10 | import shutil
 11 | import pysubs2
 12 | import re
 13 | import argparse
 14 | import time
 15 | import datetime
 16 | 
 17 | import pprint
 18 | 
 19 | pp = pprint.PrettyPrinter(indent=4)
 20 | 
 21 | parser = argparse.ArgumentParser(description="Process some integers.")
 22 | parser.add_argument(
 23 |     "--videolist-file",
 24 |     required=True,
 25 |     help="a file containing one path to a video file on each line (program input)",
 26 | )
 27 | parser.add_argument(
 28 |     "--database-dir",
 29 |     required=True,
 30 |     help="directory for generated database files (program output), merges data if file already exists",
 31 | )
 32 | parser.add_argument(
 33 |     "--clean-existing-database",
 34 |     action="store_true",
 35 |     help="do not merge new data into existing file; replace file instead",
 36 | )
 37 | parser.add_argument(
 38 |     "--dry-run", action="store_true", help="do not modify database.json"
 39 | )
 40 | 
 41 | args = parser.parse_args()
 42 | 
 43 | videolist_file_path = args.videolist_file
 44 | database_dir = args.database_dir
 45 | clean_existing_database = args.clean_existing_database
 46 | dry_run = args.dry_run
 47 | database_path = os.path.join(database_dir, "database.json")
 48 | 
 49 | print(videolist_file_path)
 50 | 
 51 | with open(videolist_file_path) as f:
 52 |     video_paths = [line.strip() for line in f]
 53 |     video_paths = [x for x in video_paths if x]
 54 | 
 55 | 
 56 | def make_parents(filename):
 57 |     if not os.path.exists(os.path.dirname(filename)):
 58 |         try:
 59 |             os.makedirs(os.path.dirname(filename))
 60 |         except OSError as exc:  # Guard against race condition
 61 |             if exc.errno != errno.EEXIST:
 62 |                 raise
 63 | 
 64 | 
 65 | def decompress(data, encoding):
 66 |     """
 67 |     Convert a base64-compressed subtitles file back to a string.
 68 | 
 69 |     :param data: the compressed data
 70 |     :param encoding: the encoding of the original file (e.g. utf-8, latin1)
 71 |     """
 72 |     try:
 73 |         return zlib.decompress(base64.b64decode(data), 16 + zlib.MAX_WBITS).decode(
 74 |             encoding
 75 |         )
 76 |     except UnicodeDecodeError as e:
 77 |         print(e, file=sys.stderr)
 78 |         return
 79 | 
 80 | 
 81 | def download_subtitles(
 82 |     ost,
 83 |     ids,
 84 |     encoding,
 85 |     override_filenames=None,
 86 |     output_directory=".",
 87 |     override_directories=None,
 88 |     extension="srt",
 89 |     return_decoded_data=False,
 90 | ):
 91 |     override_filenames = override_filenames or {}
 92 |     override_directories = override_directories or {}
 93 |     successful = {}
 94 | 
 95 |     # OpenSubtitles will accept a maximum of 20 IDs for download
 96 |     if len(ids) > 20:
 97 |         print("Cannot download more than 20 files at once.", file=sys.stderr)
 98 |         ids = ids[:20]
 99 | 
100 |     response = ost.xmlrpc.DownloadSubtitles(ost.token, ids)
101 |     status = response.get("status").split()[0]
102 |     encoded_data = response.get("data") if "200" == status else None
103 | 
104 |     if not encoded_data:
105 |         return None
106 | 
107 |     for item in encoded_data:
108 |         subfile_id = item["idsubtitlefile"]
109 | 
110 |         decoded_data = decompress(item["data"], encoding)
111 | 
112 |         if not decoded_data:
113 |             print(
114 |                 "An error occurred while decoding subtitle "
115 |                 "file ID {}.".format(subfile_id),
116 |                 file=sys.stderr,
117 |             )
118 |         elif return_decoded_data:
119 |             successful[subfile_id] = decoded_data
120 |         else:
121 |             fname = override_filenames.get(subfile_id, subfile_id + "." + extension)
122 |             directory = override_directories.get(subfile_id, output_directory)
123 |             fpath = os.path.join(directory, fname)
124 |             make_parents(fpath)
125 | 
126 |             try:
127 |                 with open(fpath, "w", encoding="utf-8") as f:
128 |                     f.write(decoded_data)
129 |                 successful[subfile_id] = fpath
130 |             except IOError as e:
131 |                 print(
132 |                     "There was an error writing file {}.".format(fpath), file=sys.stderr
133 |                 )
134 |                 print(e, file=sys.stderr)
135 | 
136 |     return successful or None
137 | 
138 | 
139 | def query_yes_no(question, default="yes"):
140 |     """Ask a yes/no question via raw_input() and return their answer.
141 | 
142 |     "question" is a string that is presented to the user.
143 |     "default" is the presumed answer if the user just hits <Enter>.
144 |         It must be "yes" (the default), "no" or None (meaning
145 |         an answer is required of the user).
146 | 
147 |     The "answer" return value is True for "yes" or False for "no".
148 |     """
149 |     valid = {"yes": True, "y": True, "ye": True, "no": False, "n": False}
150 |     exit_cmd = ["q", "Q", "Quit", "quit", "exit"]
151 |     if default is None:
152 |         prompt = " [y/n/q] "
153 |     elif default == "yes":
154 |         prompt = " [Y/n/q] "
155 |     elif default == "no":
156 |         prompt = " [y/N/q] "
157 |     else:
158 |         raise ValueError("invalid default answer: '%s'" % default)
159 | 
160 |     while True:
161 |         choice = input(question + prompt).lower()
162 |         if default is not None and choice == "":
163 |             return valid[default]
164 |         elif choice in exit_cmd:
165 |             sys.exit(0)
166 |         elif choice in valid:
167 |             return valid[choice]
168 |         else:
169 |             sys.stdout.write("Please respond with 'yes' or 'no' " "(or 'y' or 'n').\n")
170 | 
171 | 
172 | def handle_subtitle(opensubtitles_metadata):
173 |     # find subtitle ending (srt, ass, ...)
174 |     # if subtitle['SubFormat'] not in sub_format_to_ending:
175 |     #    sub_info_json = json.dumps(subtitle, indent=4)
176 |     #    print(sub_info_json)
177 |     #    print('Unreckognized subtitle format \'%s\'! Skipping this subtitle!' % subtitle['SubFormat'])
178 |     #    continue
179 |     # sub_ending = sub_format_to_ending[sub_format_to_ending[subtitle['SubFormat']]]
180 | 
181 |     # sub_filename = '{}-{:0>04}.{}'.format(movie_name_normalized, subtitle_idx, sub_ending)
182 |     # sub_data_filename = '{}-{:0>04}.{}'.format(movie_name_normalized, subtitle_idx, 'json')
183 |     sub_id = opensubtitles_metadata["IDSubtitleFile"]
184 |     print("Downloading subtitle with id `%s`..." % sub_id, file=sys.stderr, end=" ")
185 |     data = None
186 |     try:
187 |         time.sleep(0.4)
188 |         data = download_subtitles(
189 |             ost,
190 |             [sub_id],
191 |             opensubtitles_metadata["SubEncoding"],
192 |             return_decoded_data=True,
193 |         )
194 |     except KeyboardInterrupt:
195 |         raise 
196 |     except:
197 |         print("error occured")
198 | 
199 |     if data == None:
200 |         print("Error getting data - skipping subtitle!", file=sys.stderr)
201 |         return None
202 | 
203 |     print("Done!", file=sys.stderr)
204 | 
205 |     ssa_styling_pattern = re.compile(r"\s*#?{[^}]*}#?\s*")  # remove SSA-styling info
206 |     newline_whitespace = re.compile(
207 |         r"\s*\n\s*"
208 |     )  # remove unnecessary trailing space around newlines
209 | 
210 |     line_data = []
211 | 
212 |     decoded_sub_data = pysubs2.SSAFile.from_string(
213 |         data[sub_id], encoding=opensubtitles_metadata["SubEncoding"]
214 |     )
215 |     for line in decoded_sub_data:
216 |         if "www.opensubtitles.org" in line.text.lower():
217 |             continue  # remove ad as this throws of pairing/statistics (same text in different places)
218 | 
219 |         text = line.text.replace("\n", "").replace("\r", "")
220 |         text = ssa_styling_pattern.sub("", text)
221 |         text = re.sub(r"[\x00-\x1f\x7f-\x9f]", "", text)
222 |         text = text.replace(r"\N", "\n")
223 |         text = text.strip()
224 |         text = newline_whitespace.sub("\n", text)
225 | 
226 |         if line.start < line.end:
227 |             line_data.append({"start_ms": line.start, "end_ms": line.end, "text": text})
228 |         elif line.start > line.end:
229 |             line_data.append({"start_ms": line.end, "end_ms": line.start, "text": text})
230 |         else:
231 |             # start == end
232 |             pass
233 | 
234 |     line_data = sorted(line_data, key=lambda l: l["start_ms"])
235 | 
236 |     return {
237 |         "id": opensubtitles_metadata["IDSubtitleFile"],
238 |         "opensubtitles_metadata": opensubtitles_metadata,
239 |         "data": line_data,
240 |     }
241 | 
242 | 
243 | def handle_subtitle_files(
244 |     movie_id, reference_subtitle_metadata, opensubtitle_metadatas
245 | ):
246 | 
247 |     reference_subtitle_entry = handle_subtitle(reference_subtitle_metadata)
248 |     if reference_subtitle_entry == None:
249 |         print("failed to download reference subtitle...", file=sys.stderr)
250 |         return None
251 | 
252 |     token = ost.login("", "")
253 |     print("New OpenSubtitles token: %s" % token, file=sys.stderr)
254 | 
255 |     result_subtitles_list = []
256 | 
257 |     for opensubtitle_metadata in opensubtitle_metadatas:
258 |         if (
259 |             opensubtitle_metadata["IDSubtitle"]
260 |             == reference_subtitle_metadata["IDSubtitle"]
261 |         ):
262 |             print("skipping reference subtitle...", file=sys.stderr)
263 |             continue
264 | 
265 |         subtitle_entry = handle_subtitle(opensubtitle_metadata)
266 |         if subtitle_entry == None:
267 |             continue
268 |         result_subtitles_list.append(subtitle_entry)
269 | 
270 |     return (reference_subtitle_entry, result_subtitles_list)
271 | 
272 | 
273 | def ask_user_for_movie(movie_name, correct_subtitle_metadata, subtitle_files):
274 | 
275 |     movie_name_normalized = movie_name.lower().replace(" ", "-")
276 | 
277 |     data = ost.search_movies_on_imdb(movie_name)
278 |     for film in data["data"]:
279 |         if "from_redis" in film and film["from_redis"] == "false":
280 |             continue
281 |         print("%s [IMDB-ID: %s]" % (film["title"], film["id"]), file=sys.stderr)
282 |         answer = query_yes_no("Download subtitles for this movie?")
283 |         print(file=sys.stderr)
284 |         if answer is True:
285 |             imdb_id = film["id"]
286 |             subtitle_files = ost.search_subtitles(
287 |                 [{"imdbid": imdb_id, "sublanguageid": "eng"}]
288 |             )
289 |             handle_subtitle_files(
290 |                 movie_name_normalized, correct_subtitle_metadata, subtitle_files
291 |             )
292 | 
293 |             sys.exit(0)
294 | 
295 | 
296 | def to_normalized_name(s):
297 |     printable = set("0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-")
298 |     return "".join(filter(lambda x: x in printable, s.lower().replace(" ", "-")))
299 | 
300 | 
301 | ost = OpenSubtitles()
302 | from pythonopensubtitles.utils import File as OstFile
303 | 
304 | token = ost.login("", "")
305 | print("OpenSubtitles token: %s" % token, file=sys.stderr)
306 | 
307 | 
308 | if clean_existing_database:
309 |     movies = {}
310 | else:
311 |     try:
312 |         with open(database_path) as f:
313 |             movies = {movie["id"]: movie for movie in json.load(f)["movies"]}
314 |     except IOError:
315 |         movies = {}
316 | 
317 | movies_without_reference_sub_count = 0
318 | movies_with_reference_sub_count = 0
319 | 
320 | all_subtitles = {}
321 | all_ref_subtitles = {}
322 | 
323 | for file_idx, file_path in enumerate(video_paths):
324 |     f = OstFile(file_path)
325 |     file_hash = f.get_hash()
326 |     file_basename = os.path.basename(file_path)
327 | 
328 |     print(file=sys.stderr)
329 |     print("-------------------------------------------------------", file=sys.stderr)
330 |     print(
331 |         "[%s/%s] Movie `%s` with hash `%s`:"
332 |         % (file_idx, len(video_paths), file_basename, file_hash),
333 |         file=sys.stderr,
334 |     )
335 | 
336 |     time.sleep(0.2)
337 |     subtitle_files = ost.search_subtitles(
338 |         [{"moviehash": file_hash, "sublanguageid": "eng", "moviebytesize": f.size}]
339 |     )
340 |     if len(subtitle_files) == 0:
341 |         print("NOT REGISTERED on OpenSubtitles", file=sys.stderr)
342 |         movies_without_reference_sub_count = movies_without_reference_sub_count + 1
343 | 
344 |         continue
345 | 
346 |     # pp.pprint([(f['MovieName'],f['Score'],f['IDMovie']) for f in subtitle_files])
347 | 
348 |     movie_ids = [f["IDMovie"] for f in subtitle_files]
349 |     most_probable_movie = max(set(movie_ids), key=movie_ids.count)
350 |     # if movie_ids.count(most_probable_movie) < 2:
351 |     #    print('UNSURE', file=sys.stderr)
352 |     #    continue
353 | 
354 |     correct_subtitle_file = next(
355 |         x for x in subtitle_files if x["IDMovie"] == most_probable_movie
356 |     )
357 | 
358 |     # if correct_subtitle_file['MovieKind'] != 'movie':
359 |     #    print('NOT MOVIE')
360 |     #    continue
361 | 
362 |     movies_with_reference_sub_count = movies_with_reference_sub_count + 1
363 |     movie_name = correct_subtitle_file["MovieName"]
364 |     movie_name_normalized = to_normalized_name(movie_name)
365 |     movie_id = "%s#%s" % (movie_name_normalized, file_hash)
366 |     print("moviename is `%s`" % movie_name, file=sys.stderr)
367 | 
368 |     time.sleep(0.2)
369 |     subtitles_metadata = ost.search_subtitles(
370 |         [{"idmovie": correct_subtitle_file["IDMovie"], "sublanguageid": "eng"}]
371 |     )
372 | 
373 |     try:
374 |         movie_database_entry = movies[movie_id]
375 |         known_subtitles = set(
376 |             [
377 |                 subtitles_metadata["id"]
378 |                 for subtitles_metadata in movie_database_entry["subtitles"]
379 |             ]
380 |         )
381 |     except KeyError:
382 |         movie_database_entry = {
383 |             "id": movie_id,
384 |             "name": movie_name,
385 |             "path": file_path,
386 |             "reference_subtitle": None,
387 |             "subtitles": [],
388 |         }
389 |         known_subtitles = set()
390 | 
391 |     all_ref_subtitles[correct_subtitle_file["IDSubtitleFile"]] = {
392 |         "id": correct_subtitle_file["IDSubtitleFile"],
393 |         "movie_id": movie_id,
394 |         "metadata": correct_subtitle_file,
395 |     }
396 | 
397 |     for subtitle_metadata in subtitles_metadata:
398 |         if (
399 |             subtitle_metadata["IDSubtitleFile"]
400 |             == correct_subtitle_file["IDSubtitleFile"]
401 |         ):
402 |             continue
403 | 
404 |         all_subtitles[subtitle_metadata["IDSubtitleFile"]] = {
405 |             "id": subtitle_metadata["IDSubtitleFile"],
406 |             "reference_id": correct_subtitle_file["IDSubtitleFile"],
407 |             "movie_id": movie_id,
408 |             "metadata": subtitle_metadata,
409 |         }
410 | 
411 |     movies[movie_id] = movie_database_entry
412 | 
413 | #    movies_list.append(
414 | #    reference_subtitle, subtitle_list = handle_subtitle_files(movie_id, correct_subtitle_file, subtitle_metadatas)
415 | #        {
416 | #            "id": movie_id,
417 | #            "name": movie_name,
418 | #            "path": file_path,
419 | #            "reference_subtitle": reference_subtitle,
420 | #            "subtitles": subtitle_list
421 | #        }
422 | #    )
423 | 
424 | 
425 | def downloaded_subtitles_id(movies, movie_id):
426 |     return [subtitle["id"] for subtitle in movies[movie_id]["subtitles"]]
427 | 
428 | 
429 | max_sub_count_for_movie = 2
430 | 
431 | subtitles_to_download_ids_for_movie_id = {}
432 | for sub_id, sub_data in all_subtitles.items():
433 |     movie_id = sub_data["movie_id"]
434 |     if sub_id in downloaded_subtitles_id(movies, movie_id):
435 |         continue
436 |     try:
437 |         subtitles_set = subtitles_to_download_ids_for_movie_id[movie_id]
438 |     except KeyError:
439 |         subtitles_set = set()
440 |         subtitles_to_download_ids_for_movie_id[movie_id] = subtitles_set
441 | 
442 |     if len(subtitles_set) < max_sub_count_for_movie:
443 |         subtitles_set.add(sub_id)
444 | 
445 | print("---->>")
446 | pp.pprint(subtitles_to_download_ids_for_movie_id)
447 | 
448 | stop_download = False
449 | try:
450 |     for movie_id, sub_ids in subtitles_to_download_ids_for_movie_id.items():
451 |         if stop_download:
452 |             break
453 | 
454 |         movie_database_entry = movies[movie_id]
455 |         for sub_id in sub_ids:
456 |             if stop_download:
457 |                 break
458 | 
459 |             download_subtitle_data = all_subtitles[sub_id]
460 |             if movie_database_entry["reference_subtitle"] == None:
461 |                 print("%s -> ref %s" % (movie_id, sub_id))
462 |                 ref_sub_id = download_subtitle_data["reference_id"]
463 |                 try:
464 |                     sd = handle_subtitle(all_ref_subtitles[ref_sub_id]["metadata"])
465 |                     if sd == None:
466 |                         break
467 |                     movie_database_entry["reference_subtitle"] = sd
468 |                 except KeyboardInterrupt:
469 |                     print("Interrupted downloading ref sub.")
470 |                     stop_download = True
471 |                     break
472 |                 except Exception as e:
473 |                     pp.pprint(all_ref_subtitles[ref_sub_id])
474 |                     print(
475 |                         "Error downloading subtitle '{}' for movie '{}'... Skipping this movie...".format(
476 |                             sub_id, movie_id
477 |                         )
478 |                     )
479 |                     break
480 | 
481 |             print("%s -> normal %s" % (movie_id, sub_id))
482 |             sd = handle_subtitle(all_subtitles[sub_id]["metadata"])
483 |             if sd == None:
484 |                 continue
485 |             movie_database_entry["subtitles"].append(sd)
486 | except KeyboardInterrupt:
487 |     print("Interrupted downloading.")
488 |     pass
489 | except Exception as e:
490 |     print("Unexpected error: %s" % e)
491 |     pass
492 | 
493 | 
494 | movies_list = [
495 |     movie_data
496 |     for movie_id, movie_data in movies.items()
497 |     if movie_data["reference_subtitle"] != None
498 | ]
499 | 
500 | # pp.pprint(movies_list)
501 | 
502 | database_object = {
503 |     "movies": movies_list,
504 |     "movies_without_reference_sub_count": movies_without_reference_sub_count,
505 |     "movies_with_reference_sub_count": movies_with_reference_sub_count,
506 | }
507 | 
508 | 
509 | if dry_run:
510 |     print("Not writing database file because of '--dry-run'...", file=sys.stderr)
511 |     print(file=sys.stderr)
512 | else:
513 |     print(file=sys.stderr)
514 |     print("Writing database file...", file=sys.stderr, end="")
515 |     os.makedirs(database_dir, exist_ok=True)
516 | 
517 |     try:
518 |         timestring = datetime.datetime.now().strftime("%Y-%m-%d--H%H-M%M-S%S")
519 |         database_bk_path = os.path.join(
520 |             database_dir, "database-bk-%s.json" % timestring
521 |         )
522 |         os.rename(database_path, database_bk_path)
523 |     except:
524 |         pass
525 | 
526 |     with open(database_path, "w") as f:
527 |         json.dump(database_object, f)
528 | 
529 | print("Done!", file=sys.stderr)
530 | 
531 | sys.exit(0)
532 | 


--------------------------------------------------------------------------------
/statistics-helpers/generate_plots_from_statistics.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python3.7
  2 | 
  3 | #show_plots = True
  4 | show_plots = False
  5 | 
  6 | plot_span_length_histogram_enabled = False
  7 | plot_movie_hash_maches_enabled = False
  8 | plot_sync_state_distribution_enabled = False
  9 | plot_distance_to_reference_histogram_enabled = False
 10 | plot_offsets_by_split_penalty = False
 11 | plot_offsets_by_optimization = False
 12 | plot_offsets_by_min_span_length = False
 13 | plot_runtime_by_optimization = False
 14 | plot_all_configurations = False
 15 | plot_all_algorithms_time = False
 16 | 
 17 | #plot_span_length_histogram_enabled = True
 18 | plot_movie_hash_maches_enabled = True
 19 | plot_sync_state_distribution_enabled = True
 20 | plot_distance_to_reference_histogram_enabled = True
 21 | plot_offsets_by_split_penalty = True
 22 | plot_offsets_by_optimization = True
 23 | plot_offsets_by_min_span_length = True
 24 | plot_runtime_by_optimization = True
 25 | plot_all_configurations = True
 26 | plot_all_algorithms_time = True
 27 | 
 28 | offset_text = 'Distance to reference in milliseconds'
 29 | 
 30 | 
 31 | class OffsetStatistics:
 32 |     def __init__(self, histogram):
 33 |         occurrences = histogram["occurrences"]
 34 |         occurrences_sorted = sorted(
 35 |             [(int(offset), int(count)) for (offset, count) in occurrences.items()],
 36 |             key=lambda d: d[0],
 37 |         )
 38 |         total = sum([count for (offset, count) in occurrences_sorted])
 39 | 
 40 |         self.min = occurrences_sorted[0][0]
 41 |         self.max = occurrences_sorted[-1][0]
 42 |         self.total = total
 43 | 
 44 |         idx = 0
 45 |         current_offset, last_bin_idx = occurrences_sorted[idx]
 46 | 
 47 |         self.percentiles = {}
 48 | 
 49 |         for percentile in range(0, 100):
 50 |             perc_idx = int((percentile / 100) * total)
 51 | 
 52 |             while True:
 53 |                 if perc_idx < last_bin_idx:
 54 |                     self.percentiles[percentile] = current_offset
 55 |                     break
 56 |                 else:
 57 |                     idx = idx + 1
 58 |                     current_offset, count = occurrences_sorted[idx]
 59 |                     last_bin_idx = last_bin_idx + count
 60 | 
 61 |         self.percentiles[100] = self.max
 62 |         pass
 63 | 
 64 | 
 65 | def plot_conf(histogram):
 66 |     offset_statistics = OffsetStatistics(histogram)
 67 | 
 68 |     result = []
 69 |     for percentile in plotted_percentiles:
 70 |         result.append(offset_statistics.percentiles[percentile])
 71 |     return result
 72 | 
 73 | 
 74 | import matplotlib
 75 | import matplotlib.pyplot as plt
 76 | import math
 77 | import json
 78 | import argparse
 79 | import numpy as np
 80 | import csv
 81 | import os
 82 | import sys
 83 | 
 84 | FIXED_POINT_NUMBER_FACTOR = 100000000
 85 | 
 86 | 
 87 | parser = argparse.ArgumentParser(description="Process some integers.")
 88 | parser.add_argument(
 89 |     "--statistics-dir",
 90 |     required=True,
 91 |     help="directory for statistics files (program input)",
 92 | )
 93 | parser.add_argument(
 94 |     "--plots-dir",
 95 |     required=True,
 96 |     help="directory for generated plot files (program output)",
 97 | )
 98 | parser.add_argument(
 99 |     "--file-extension", default="png", help="File extension of the generated plots"
100 | )
101 | 
102 | args = parser.parse_args()
103 | 
104 | statistics_folder_path = args.statistics_dir
105 | output_dir = args.plots_dir
106 | extension = args.file_extension
107 | 
108 | os.makedirs(output_dir, exist_ok=True)
109 | 
110 | ###################################################
111 | # Span Length Histogram
112 | 
113 | with open(os.path.join(statistics_folder_path, "statistics.json"), "r") as f:
114 |     statistics = json.load(f)
115 | 
116 | with open(os.path.join(statistics_folder_path, "transient-statistics.json"), "r") as f:
117 |     transient_statistics = json.load(f)
118 | 
119 | 
120 | plt.rcParams.update({"figure.figsize": (8, 5), "figure.dpi": 400})
121 | plt.rcParams['text.usetex'] = True
122 | plt.rcParams.update({'font.size': 14})
123 | 
124 | 
125 | 
126 | plotted_percentiles = list(reversed([20, 50, 80, 90, 95, 99]))
127 | plotted_percentiles_color = list(
128 |     reversed(["black", "darkred", "red", "orange", "green", "darkgreen"])
129 | )
130 | perc_heights_array = [[] for _ in range(0, len(plotted_percentiles))]
131 | 
132 | if plot_runtime_by_optimization:
133 | 
134 |     fig, ax = plt.subplots()
135 | 
136 |     ax.spines["top"].set_visible(False)
137 |     ax.spines["right"].set_visible(False)
138 | 
139 |     data = transient_statistics['time_required_by_optimization_value']
140 |     data = sorted(data, key=lambda d: float(d['key']) / FIXED_POINT_NUMBER_FACTOR)
141 | 
142 |     desc = []
143 |     boxplot_data = []
144 |     positions = []
145 |     for i, d in enumerate(data):
146 |         opt_value = float(d['key']) / FIXED_POINT_NUMBER_FACTOR
147 |         if opt_value >= 1:
148 |             opt_value = int(opt_value)
149 |         runtimes = [ms / 1000 for ms in d['val']]
150 |         desc.append(opt_value)
151 |         boxplot_data.append(runtimes)
152 |         positions.append(i)
153 | 
154 |     plt.boxplot(boxplot_data, positions=positions)
155 |     plt.xticks(
156 |         positions,
157 |         desc
158 |     )
159 |     plt.xlabel("Approximation bound $E$")
160 |     plt.ylabel("Time in seconds")
161 |     plt.gca().set_ylim(bottom=0)
162 |     plt.savefig(os.path.join(output_dir, "required-time-by-optimization." + extension), bbox_inches='tight')
163 |     if show_plots: plt.show()
164 | 
165 | if plot_all_algorithms_time:
166 | 
167 |     fig, ax = plt.subplots()
168 | 
169 |     ax.axvline(1, color="gray", linestyle="dotted", linewidth=0.8)
170 |     ax.axvline(6, color="gray", linestyle="dotted", linewidth=0.8)
171 | 
172 |     ind = [-1,0, 2, 3, 4, 5, 7, 8, 9, 10]
173 | 
174 |     configurations = transient_statistics["time_required_by_algorithm"]
175 |     configurations = sorted(
176 |         configurations,
177 |         key=lambda configuration: (
178 |             configuration["key"]["sync_ref_type"],
179 |             {"None": 0, "Advanced": 1}[configuration["key"]["scaling_correct_mode"]],
180 |             configuration["key"]["algorithm_variant"],
181 |         ),
182 |     )
183 | 
184 |     data = []
185 |     desc = []
186 |     desc.append('Audio Extraction')
187 |     desc.append('VAD')
188 |     for configuration in configurations:
189 |         data.append([x / 1000 for x in configuration['val']])
190 |         s = []
191 |         if configuration["key"]["algorithm_variant"] == "Split":
192 |             s.append("Split")
193 |         else:
194 |             s.append("No-split")
195 |         if configuration["key"]["scaling_correct_mode"] == "Advanced":
196 |             s.append("FPS")
197 |         # s.append(configuration["key"]["sync_ref_type"])
198 |         desc.append(" + ".join(s))
199 | 
200 | 
201 |     # measured separately
202 |     extracting_audio_time = 8
203 |     extracting_audio_time_with_vad = 9
204 | 
205 | 
206 |     ax.boxplot([[5.8,17.6,8.8,10.4,18.3,18.9]] + [[1.131, 1.394, 1.516, 1.698]] + data,positions=ind)
207 | 
208 |     ax.axvline(1, color="gray", linestyle="dotted", linewidth=0.8)
209 |     ax.axvline(6, color="gray", linestyle="dotted", linewidth=0.8)
210 |     ax.spines["top"].set_visible(False)
211 |     ax.spines["right"].set_visible(False)
212 | 
213 |     ax.set_ylim(bottom=0)
214 |     theight= 13.5
215 |     plt.text(3.5, theight, "Aligning to subtitle", ha="center", wrap=True)
216 |     plt.text(8.5, theight, "Aligning to audio", ha="center", wrap=True)
217 | 
218 |     plt.xticks(ind, desc)
219 |     plt.xticks(rotation=60, ha="right")
220 |     # fig.tight_layout()
221 |     plt.subplots_adjust(bottom=0.25)
222 |     #plt.title("Runtime comparison of algorithm variants")
223 |     plt.ylabel("Time in seconds")
224 |     #plt.xlabel("Algorithm Variant")
225 |     plt.tight_layout()
226 |     plt.savefig(os.path.join(output_dir, "algorithms-time-variant." + extension), bbox_inches='tight')
227 |     if show_plots: plt.show()
228 |     plt.close()
229 | 
230 |     pass
231 | 
232 | if plot_all_configurations:
233 |     #plt.figure(num=None, figsize=(8, 3), dpi=200, facecolor="w", edgecolor="k")
234 | 
235 |     fig, ax = plt.subplots()
236 | 
237 |     ind = []
238 |     bar_data = []
239 |     desc = []
240 |     configurations = statistics["all_configurations_offset_histogram"]
241 |     configurations = sorted(
242 |         configurations,
243 |         key=lambda configuration: (
244 |             configuration["key"]["sync_ref_type"],
245 |             {"None": 0, "Advanced": 1}[configuration["key"]["scaling_correct_mode"]],
246 |             configuration["key"]["algorithm_variant"],
247 |         ),
248 |     )
249 | 
250 |     bar_data.append(plot_conf(statistics["raw_distance_histogram"]))
251 |     desc.append("raw")
252 | 
253 |     for i, configuration in enumerate(configurations):
254 |         bar_data.append(plot_conf(configuration["val"]))
255 |         s = []
256 |         if configuration["key"]["algorithm_variant"] == "Split":
257 |             s.append("Split")
258 |         else:
259 |             s.append("No-split")
260 |         if configuration["key"]["scaling_correct_mode"] == "Advanced":
261 |             s.append("FPS")
262 |         # s.append(configuration["key"]["sync_ref_type"])
263 |         desc.append(" + ".join(s))
264 | 
265 |     ind = [0, 2, 3, 4, 5, 7, 8, 9, 10]
266 | 
267 |     bar_data = np.array(bar_data)
268 | 
269 |     for i, (percentile, color) in enumerate(
270 |         zip(plotted_percentiles, plotted_percentiles_color)
271 |     ):
272 |         label = "%sth percentile" % percentile
273 |         plt.bar(ind, bar_data[:, i], width=0.8, label=label, color=color)
274 | 
275 |     ax = plt.gca()
276 | 
277 | 
278 |     ax.axvline(1, color="gray", linestyle="dotted", linewidth=0.8)
279 |     ax.axvline(6, color="gray", linestyle="dotted", linewidth=0.8)
280 |     ax.set_ylim(bottom=0)
281 |     ax.spines["top"].set_visible(False)
282 |     ax.spines["right"].set_visible(False)
283 | 
284 |     props = dict(boxstyle='round', facecolor='white', alpha=0.8)
285 | 
286 |     ytop = 5000
287 |     tpos = ytop - 500
288 | 
289 |     plt.text(3.8, tpos, "Aligning to subtitle", ha="center", wrap=True, bbox=props)
290 |     plt.text(8.8, tpos, "Aligned to movie", ha="center", wrap=True, bbox=props)
291 | 
292 |     plt.xticks(ind, desc)
293 |     plt.xticks(rotation=60, ha="right")
294 |     plt.gca().set_ylim([0,ytop])
295 |     # fig.tight_layout()
296 |     plt.subplots_adjust(bottom=0.25)
297 |     plt.ylabel(offset_text)
298 |     plt.legend(loc="upper right", bbox_to_anchor=(0, 0, 1, 0.8))
299 |     plt.tight_layout()
300 |     plt.savefig(os.path.join(output_dir, "mode-comparison." + extension), bbox_inches='tight')
301 |     if show_plots: plt.show()
302 |     plt.close()
303 | 
304 | 
305 | def draw_histogram(ax, bins, json_histogram, color):
306 |     val, weight = zip(*[(int(k) / 1000.0, int(v)) for k, v in json_histogram.items()])
307 |     height, bins = np.histogram(val, weights=weight, bins=bins)
308 |     height = np.divide(height, np.max(height))
309 |     ax.step(
310 |         bins[:-1], height, "k", linestyle="-", linewidth=1, where="post", color=color
311 |     )
312 |     ax.bar(
313 |         bins[:-1],
314 |         height,
315 |         width=np.diff(bins),
316 |         linewidth=0,
317 |         facecolor=color,
318 |         alpha=0.3,
319 |         align="edge",
320 |     )
321 | 
322 | def plot_offset_percentiles_for_values(
323 |     ax, statistics, database_values, title, xlabel, ylim, scale_factor=FIXED_POINT_NUMBER_FACTOR
324 | ):
325 |         global perc_heights_array
326 | 
327 |         ax.spines["top"].set_visible(False)
328 |         ax.spines["right"].set_visible(False)
329 | 
330 |         ax.set_ylim([0, ylim])
331 |         # ax.set_yscale("log")
332 | 
333 |         ind = []
334 |         split_penalties = []
335 | 
336 |         data = [
337 |             (float(split_penalty_str), histogram)
338 |             for (split_penalty_str, histogram) in database_values.items()
339 |         ]
340 |         data = sorted(data, key=lambda v: v[0])
341 | 
342 |         bar_data = []
343 |         for i, (split_penalty_str, histogram) in enumerate(data):
344 |             split_penalty = float(split_penalty_str) / scale_factor 
345 |             split_penalties.append(split_penalty)
346 |             bar_data.append(plot_conf(histogram))
347 | 
348 |         bar_data.append(plot_conf(statistics["raw_distance_histogram"]))
349 | 
350 |         ind = list(range(0, len(data))) + [len(data) + 1]
351 | 
352 |         bar_data = np.array(bar_data)
353 |         for i, (percentile, color) in enumerate(
354 |             zip(plotted_percentiles, plotted_percentiles_color)
355 |         ):
356 |             label = "%sth percentile" % percentile
357 |             plt.bar(ind, bar_data[:, i], width=0.8, label=label, color=color)
358 | 
359 |         plt.xticks(
360 |             ind,
361 |             [
362 |                 int(split_penalty) if split_penalty < 0.0000001 or split_penalty > 1 else split_penalty
363 |                 for split_penalty in split_penalties
364 |             ]
365 |             + ["raw"],
366 |         )
367 |         plt.ylabel(offset_text)
368 |         plt.xlabel(xlabel)
369 |         plt.legend()#loc="upper right", bbox_to_anchor=(0, 0, 0.9, 1))
370 |         if title != None: plt.title(title)
371 |         plt.xticks(rotation=45)
372 | 
373 | if plot_offsets_by_min_span_length:
374 |     #plt.figure(num=None, figsize=(8, 5), dpi=200, facecolor="w", edgecolor="k")
375 | 
376 |     ylim = 2000
377 | 
378 |     ax = plt.subplot(111)
379 |     plot_offset_percentiles_for_values(
380 |         ax,
381 |         statistics,
382 |         statistics["sync_offset_histogram_by_min_span_length"],
383 |         None,
384 |         "Minimum required span length in milliseconds",
385 |         ylim,
386 |         scale_factor=1.
387 |     )
388 | 
389 |     #plt.suptitle("", fontsize=20)
390 | 
391 |     plt.tight_layout()
392 |     plt.savefig(os.path.join(output_dir, "min-span-length." + extension), bbox_inches='tight')
393 |     if show_plots: plt.show()
394 |     plt.close()
395 | 
396 | if plot_offsets_by_optimization:
397 | 
398 |     plt.figure(num=None, figsize=(8, 8), dpi=400, facecolor="w", edgecolor="k")
399 | 
400 |     ylim = 2000
401 | 
402 |     ax = plt.subplot(211)
403 |     plot_offset_percentiles_for_values(
404 |         ax,
405 |         statistics,
406 |         statistics["sync_offset_histogram_by_optimization"]["Video"],
407 |         "Synchronizing to audio",
408 |         "Optimal split approximation constant $E$",
409 |         ylim
410 |     )
411 | 
412 |     ax = plt.subplot(212)
413 |     plot_offset_percentiles_for_values(
414 |         ax,
415 |         statistics,
416 |         statistics["sync_offset_histogram_by_optimization"]["Subtitle"],
417 |         "Synchronizing to subtitles",
418 |         "Optimal split approximation constant $E$",
419 |         ylim
420 |     )
421 | 
422 |     plt.tight_layout(pad=0.7, h_pad=2, rect=(0, 0, 1, 0.9))
423 | 
424 |     #plt.suptitle("Comparision of optimization", fontsize=20)
425 | 
426 |     plt.savefig(os.path.join(output_dir, "optimization-values." + extension), bbox_inches='tight')
427 |     if show_plots: plt.show()
428 |     plt.close()
429 | 
430 | 
431 | if plot_offsets_by_split_penalty:
432 | 
433 |     plt.figure(num=None, figsize=(8, 8), dpi=400, facecolor="w", edgecolor="k")
434 | 
435 | 
436 |     ylim = 6000
437 |     ax = plt.subplot(211)
438 |     plot_offset_percentiles_for_values(
439 |         ax,
440 |         statistics,
441 |         statistics["sync_offset_histogram_by_split_penalty"]["Video"],
442 |         "Synchronizing to audio",
443 |         "Split penalty $P$",
444 |         ylim
445 |     )
446 | 
447 |     ax = plt.subplot(212)
448 |     plot_offset_percentiles_for_values(
449 |         ax,
450 |         statistics,
451 |         statistics["sync_offset_histogram_by_split_penalty"]["Subtitle"],
452 |         "Synchronizing to subtitles",
453 |         "Split penalty $P$",
454 |         ylim
455 |     )
456 | 
457 |     plt.tight_layout(pad=0.7, h_pad=2, rect=(0, 0, 1, 0.9))
458 | 
459 |     #plt.suptitle("Comparision of split penalties", fontsize=20)
460 | 
461 |     plt.savefig(os.path.join(output_dir, "split-penalties." + extension), bbox_inches='tight')
462 |     if show_plots: plt.show()
463 |     plt.close()
464 | 
465 | 
466 | if plot_span_length_histogram_enabled:
467 | 
468 |     # Remove the plot frame lines. They are unnecessary chartjunk.
469 |     ax = plt.subplot(111)
470 |     ax.spines["top"].set_visible(False)
471 |     ax.spines["right"].set_visible(False)
472 | 
473 |     # Ensure that the axis ticks only show up on the bottom and left of the plot.
474 |     # Ticks on the right and top of the plot are generally unnecessary chartjunk.
475 |     ax.get_xaxis().tick_bottom()
476 |     ax.get_yaxis().tick_left()
477 | 
478 |     # Make sure your axis ticks are large enough to be easily read.
479 |     # You don't want your viewers squinting to read your plot.
480 |     plt.xticks(range(0, 11, 1), fontsize=14)
481 |     plt.yticks(fontsize=14)
482 | 
483 |     binwidth = 0.05
484 |     bins = np.arange(0, 10 + binwidth, binwidth)
485 | 
486 |     vad_color = (1, 0.5, 0.05)  # '#ff7f0e'
487 |     vad_color_light = tuple([c * 0.3 + 0.7 for c in vad_color])
488 |     subtitle_color = (0.12, 0.42, 0.74)  # '#1f77b4'
489 |     subtitle_color_light = tuple([c * 0.3 + 0.7 for c in subtitle_color])
490 | 
491 |     draw_histogram(
492 |         ax, bins, statistics["vad_span_length_histogram"]["occurrences"], vad_color
493 |     )
494 |     draw_histogram(
495 |         ax,
496 |         bins,
497 |         statistics["subtitle_span_length_histogram"]["occurrences"],
498 |         subtitle_color,
499 |     )
500 | 
501 |     ax.set(
502 |         title=None, ylabel="Normalized Frequency", xlabel="Length of spans in seconds"
503 |     )
504 |     ax.set_xlim([0, 10])
505 |     ax.get_yaxis().set_ticks([])
506 |     ax.legend(["Spans from Voice-Activity-Detection", "Spans from Subtitles"])
507 |     # ax.set_ylim([0, 1])
508 | 
509 |     plt.savefig(os.path.join(output_dir, "span-lengths-histogram." + extension), bbox_inches='tight')
510 |     # plt.show()
511 |     plt.close()
512 | 
513 | 
514 | def make_beautiful_pie_chart(ax, labels, title, sizes, explode, pie_colors):
515 | 
516 |     pie_colors = [matplotlib.colors.to_rgb(c) for c in pie_colors]
517 |     pie_colors_light = [(r, g, b, 0.3) for (r, g, b) in pie_colors]
518 |     pie_colors_light2 = [(r, g, b, 1) for (r, g, b) in pie_colors]
519 | 
520 |     wedges, _, _ = ax.pie(
521 |         sizes,
522 |         explode=explode,
523 |         labels=labels,
524 |         autopct="%1.1f%%",
525 |         pctdistance=0.85,
526 |         startangle=90,
527 |         colors=pie_colors_light,
528 |     )
529 |     # Equal aspect ratio ensures that pie is drawn as a circle.
530 |     ax.axis("equal")
531 |     ax.set_title(title, pad=-200)
532 | 
533 |     centre_circle = plt.Circle((0, 0), 0.70, fc="white")
534 |     centre_circle.set_linewidth(2)
535 |     centre_circle.set_edgecolor("#444444")
536 |     ax.add_artist(centre_circle)
537 | 
538 |     for w, edge_color in zip(wedges, pie_colors_light2):
539 |         w.set_linewidth(2)
540 |         w.set_edgecolor(edge_color)
541 | 
542 |     pass
543 | 
544 | 
545 | if plot_movie_hash_maches_enabled:
546 | 
547 |     labels = ["No match", "Match"]
548 |     sizes = [
549 |         statistics["general"]["total_movie_count"]
550 |         - statistics["general"]["movie_with_ref_sub_count"],
551 |         statistics["general"]["movie_with_ref_sub_count"],
552 |     ]
553 |     explode = (0, 0)  # only "explode" the 2nd slice (i.e. 'Hogs')
554 | 
555 |     pie_colors = ["red", "green"]
556 | 
557 |     fig, ax = plt.subplots()
558 |     make_beautiful_pie_chart(
559 |         ax,
560 |         labels,
561 |         "Movie file hash matches on 'OpenSubtitles.org'",
562 |         sizes,
563 |         explode,
564 |         pie_colors,
565 |     )
566 | 
567 |     plt.savefig(os.path.join(output_dir, "movie-hash-matches." + extension), bbox_inches='tight')
568 |     # plt.show()
569 |     plt.close()
570 | 
571 | 
572 | def plot_sync_class_distribution(ax, input_data, title):
573 |     unknown_count = input_data["unknown"]
574 |     synced_count = input_data["synced"]
575 |     unsynced_count = input_data["unsynced"]
576 | 
577 |     labels = ["Good", "Bad"]
578 |     explode = (0, 0)
579 |     pie_colors = ["green", "red"]
580 |     sizes = [synced_count, unsynced_count]
581 | 
582 |     make_beautiful_pie_chart(ax, labels, title, sizes, explode, pie_colors)
583 | 
584 | 
585 | if plot_sync_state_distribution_enabled:
586 | 
587 |     fig = plt.gcf()
588 |     fig.tight_layout(pad=0, w_pad=-20, rect=(0,0,1,0.7))
589 |     #fig.suptitle("Percentage of synchronized subtitles", fontsize=16)
590 |     plt.subplots_adjust(top=0.6) 
591 | 
592 |     statistics["general"]["raw_sync_class_counts"]['synced'] += 3
593 |     ax = plt.subplot(131)
594 |     plot_sync_class_distribution(
595 |         ax, statistics["general"]["raw_sync_class_counts"], title="Raw subtitle files"
596 |     )
597 | 
598 |     d = statistics["general"]["sync_to_video_sync_class_counts"]
599 |     d['unsynced'] += 3
600 |     ax = plt.subplot(132)
601 |     plot_sync_class_distribution(
602 |         ax,
603 |         d,
604 |         title="Aligning to audio",
605 |     )
606 | 
607 |     statistics["general"]["sync_to_sub_sync_class_counts"]['synced'] += 3
608 |     ax = plt.subplot(133)
609 |     plot_sync_class_distribution(
610 |         ax,
611 |         statistics["general"]["sync_to_sub_sync_class_counts"],
612 |         title="Aligning to subtitle",
613 |     )
614 | 
615 |     plt.savefig(os.path.join(output_dir, "sync-state-distribution." + extension), bbox_inches='tight')
616 |     # plt.show()
617 |     plt.close()
618 | 
619 | 
620 | if plot_distance_to_reference_histogram_enabled:
621 | 
622 |     # Remove the plot frame lines. They are unnecessary chartjunk.
623 |     ax = plt.subplot(111)
624 |     ax.spines["top"].set_visible(False)
625 |     ax.spines["right"].set_visible(False)
626 | 
627 |     # Ensure that the axis ticks only show up on the bottom and left of the plot.
628 |     # Ticks on the right and top of the plot are generally unnecessary chartjunk.
629 |     ax.get_xaxis().tick_bottom()
630 |     ax.get_yaxis().tick_left()
631 | 
632 |     # Make sure your axis ticks are large enough to be easily read.
633 |     # You don't want your viewers squinting to read your plot.
634 |     startrange = 0.01
635 |     endrange = 30.0
636 |     # plt.xticks([startrange, endrange], fontsize=14)
637 |     # plt.yticks(fontsize=14)
638 | 
639 |     binwidth = 0.05
640 |     bins = np.logspace(np.log10(startrange), np.log10(endrange), 300)  #
641 | 
642 |     def plot_cusum(json_histogram, bins, ax, color):
643 | 
644 |         items = [(int(k), int(v)) for k, v in json_histogram["occurrences"].items()]
645 |         count_sum = sum([count for (ms, count) in items])
646 | 
647 |         items = [
648 |             (ms / 1000.0, count) if ms > 10 else ((10 / 1000.0), count)
649 |             for (ms, count) in items
650 |         ]
651 |         val, weight = zip(*items)
652 |         height, bins = np.histogram(val, weights=weight, bins=bins)
653 |         height = np.cumsum(height)
654 |         ax.step(
655 |             bins[:-1],
656 |             height,
657 |             "k",
658 |             linestyle="-",
659 |             linewidth=1,
660 |             where="post",
661 |             color=color,
662 |         )
663 |         ax.bar(
664 |             bins[:-1],
665 |             height,
666 |             width=np.diff(bins),
667 |             linewidth=0,
668 |             facecolor=color,
669 |             alpha=0.3,
670 |             align="edge",
671 |         )
672 | 
673 |         return count_sum
674 | 
675 |     count_sum1 = plot_cusum(statistics["raw_distance_histogram"], bins, ax, "red")
676 | 
677 |     count_sum2 = plot_cusum(
678 |         statistics["sync_to_video_distance_histogram"], bins, ax, "orange"
679 |     )
680 |     assert count_sum1 == count_sum2
681 | 
682 |     count_sum3 = plot_cusum(
683 |         statistics["sync_to_sub_distance_histogram"], bins, ax, "green"
684 |     )
685 |     assert count_sum1 == count_sum3
686 | 
687 |     count_sum = count_sum1
688 | 
689 |     step = count_sum // 6
690 |     step = int(round(step, int(-math.log10(step))))
691 | 
692 |     plt.yticks(list(range(0, count_sum - 1 - step, step)) + [count_sum], fontsize=10)
693 |     # ax.axvline(x=0.2,color='black',linewidth=0.8,linestyle='dotted')
694 |     ax.axhline(y=count_sum, color="black", linewidth=0.8, linestyle="dotted", xmin=0.2)
695 | 
696 |     ax.legend(
697 |         [
698 |             "Without synchronization",
699 |             "Synchronized to audio",
700 |             "Synchronized to subtitle",
701 |         ]
702 |     )
703 |     ax.set(
704 |         xlabel="Time distance of lines between reference subtitle and incorrect subtitles",
705 |         ylabel="Frequency (cumulative sum)",
706 |     )
707 |     ax.set_xscale("log")
708 |     ax.set_xlim((startrange, endrange))
709 |     ax.set_ylim((0, count_sum * 1.1))
710 | 
711 |     def format_func(value, tick_number):
712 |         if value < 1:
713 |             return "{}ms".format(int(value * 1000))
714 |         else:
715 |             return "{}s".format(int(value))
716 | 
717 |     ax.xaxis.set_major_formatter(plt.FuncFormatter(format_func))
718 | 
719 |     # plt.yscale('log')
720 |     # plt.xscale('log')
721 | 
722 |     plt.savefig(os.path.join(output_dir, "distance-histogram." + extension), bbox_inches='tight')
723 | 
724 |     # plt.show()
725 |     plt.close()
726 | 


--------------------------------------------------------------------------------
/statistics-helpers/list-all-subtitles.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | import json
 3 | import os
 4 | import datetime
 5 | import copy
 6 | 
 7 | print('start load')
 8 | with open('generated-data/1-database/database.json', 'r') as f:
 9 |     database = json.load(f)
10 | print('end load')
11 | 
12 | for movie_data in database['movies']:
13 |     print()
14 |     print('%s ref %s' % (movie_data['id'], movie_data['reference_subtitle']['id']))
15 |     for sub_data in movie_data['subtitles']:
16 |         print('%s %s' % (movie_data['id'], sub_data['id']))
17 | 
18 | #import time
19 | #time.sleep(2)
20 | #print(sub_data['id'])
21 | #time.sleep(2)
22 | #print(orig_sub_data['data'])
23 | #time.sleep(2)
24 | 
25 | 
26 | #walle['reference_subtitle']['data'] = new_movie_ref_sub_data
27 | #walle['reference_subtitle']['id'] = '%s,%s' % (walle['reference_subtitle']['id'], move)
28 | 


--------------------------------------------------------------------------------
/statistics-helpers/plots_from_videolist.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/bash
 2 | PREFIX=./generated-data
 3 | 
 4 | export VIDEOLIST_FILE="./test_data/videos.list"
 5 | export CACHE_DIR="$PREFIX/cache"
 6 | export DATABASE_DIR="$PREFIX/1-database"
 7 | export STATISTICS_DIR="$PREFIX/2-statistics"
 8 | export PLOTS_DIR="$PREFIX/3-plots"
 9 | 
10 | 
11 | skip_database=1
12 | skip_statistics=0
13 | skip_plots=0
14 | clean_cache=0
15 | 
16 | while [ "$1" != "" ]; do
17 |     case $1 in
18 |         --no-skip-database)    skip_database=0 ;;
19 |         --skip-database)    skip_database=1 ;;
20 |         --skip-statistics)    skip_statistics=1 ;;
21 |         --skip-plots)    skip_plots=1 ;;
22 |         --clean-cache)    clean_cache=1 ;;
23 |         * )                     echo "Unexpected input flag '$1'! Exiting."
24 |                                 exit 1
25 |     esac
26 |     shift
27 | done
28 | 
29 | if [ "$clean_cache" = "1" ]; then
30 | 	echo "Cleaning statistics cache requested otherwise by user!"
31 | 	rm "$CACHE_DIR" -rf
32 | fi
33 | 
34 | 
35 | 
36 | echo '======================================================================'
37 | echo "Generating database..."
38 | echo '======================================================================'
39 | 
40 | 
41 | if [ "$skip_database" = "1" ]; then
42 | 	echo "Skipping database creation as not requested otherwise by user!"
43 | else
44 | 	python3.7 ./statistics-helpers/generate_database_from_videolist.py --videolist-file "$VIDEOLIST_FILE" --database-dir "$DATABASE_DIR"
45 | 	echo "Generating database done!"
46 | fi
47 | 
48 | echo
49 | echo
50 | 
51 | echo '======================================================================'
52 | echo "Generating statistics..."
53 | echo '======================================================================'
54 | 
55 | if [ "$skip_statistics" = "1" ]; then
56 | 	echo "Skipping statistics generation as requested by user!"
57 | else
58 | 	cargo run \
59 | 		--example=generate_statistics_from_database \
60 | 		--release \
61 | 		-- \
62 | 		--database-dir "$DATABASE_DIR" --statistics-dir "$STATISTICS_DIR" --cache-dir "$CACHE_DIR" \
63 | 		--split-penalties 0.05,0.1,0.25,0.5,1,2,3,4,5,6,7,8,9,10,20,30,50,100,1000 \
64 | 		--optimization-values 0.05,0.1,0.2,0.5,1,2,3,4,5,7,10,15,20,25,30 \
65 | 		--default-split-penalty 6 \
66 | 		--default-min-span-length 500 \
67 | 		--min-span-lengths  0,100,200,300,400,500,600,800,1000,1250,1500,2000  \
68 | 		--default-optimization 2 \
69 | 		--default-max-good-sync-offset 300,500,1000,1300 \
70 | 		--default-required-good-sync-spans-percentage 25,70,95,99  \
71 | 		--num-threads 2 \
72 | 		--only-every-nth-sub 1 \
73 | 		#--only-transient-statistics
74 | 		#--only-general-statistics \
75 | 	echo "Generating statistics done!"
76 | fi
77 | 
78 | echo
79 | echo
80 | 
81 | echo '======================================================================'
82 | echo "Generating plots in '${PLOTS_DIR}'..."
83 | echo '======================================================================'
84 | if [ "$skip_plots" = "1" ]; then
85 | 	echo "Skipping plots generation as requested by user!"
86 | else
87 | 	python3.7 ./statistics-helpers/generate_plots_from_statistics.py --statistics-dir "$STATISTICS_DIR" --plots-dir "$PLOTS_DIR"
88 | 	echo "Generating plots in '${PLOTS_DIR}' done!"
89 | fi
90 | 
91 | 


--------------------------------------------------------------------------------
/statistics-helpers/worst_movies.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | 
 3 | with open('generated-data/2-statistics/statistics.json', 'r') as f:
 4 |     data = json.load(f)
 5 | 
 6 | data2 = [x for x in data['offset_by_subtitle']]
 7 | 
 8 | wo = sorted(data2, key=lambda x: x['video_sync_offsets']['perc99'])
 9 | print(json.dumps(wo, indent=3))
10 | 


--------------------------------------------------------------------------------