├── .gitignore ├── README.md └── summarize-movie-dialog.sh /.gitignore: -------------------------------------------------------------------------------- 1 | /in 2 | /tmp 3 | /out 4 | /.idea -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # movie-dialog-summarizer 2 | Cuts movie dialog summary video from video file and subtitles. 3 | 4 | # Examples 5 | - [The Shawshank Redemption](https://youtu.be/FZdDk7A4t1A) 6 | - [The Gotfather Part I](https://youtu.be/6pY6qu0AZ2Y) 7 | - [The Dark Knight](https://youtu.be/Px-f24xC0q0) 8 | - [Schindler's List](https://youtu.be/3JdCfARm4IQ) 9 | - [12 Angry Men](https://youtu.be/dbfdSBm9jmU) 10 | - [The Fellowship Of The Ring](https://youtu.be/75M5UUtTUgA) 11 | 12 | # Technologies 13 | - [LexRank](https://en.wikipedia.org/wiki/Automatic_summarization#TextRank_and_LexRank) 14 | - [Sumy](https://github.com/miso-belica/sumy) 15 | - [ffmpeg](https://github.com/FFmpeg/FFmpeg) 16 | 17 | # Usage 18 | ./summarize-movie-dialog.sh [video file] [subtitles ".srt" file] 19 | -------------------------------------------------------------------------------- /summarize-movie-dialog.sh: -------------------------------------------------------------------------------- 1 | getStamps () { 2 | grep -F -B1 "${match}" "tmp/$name-subtitles.txt" | 3 | grep '^[0-9][0-9]:[0-9][0-9]' | tr ',' '.' 4 | } 5 | getStart () { 6 | getStamps | awk '{print $1}'; 7 | } 8 | getEnd () { 9 | getStamps | awk '{print $3}'; 10 | } 11 | addSeconds() { 12 | echo "$(date --date "@$(($(date --date "2015-01-01 ${1}" +%s)+${2}))" +%H:%M:%S)"; 13 | } && (addSeconds "01:03:22.777" "1" |grep -q '01:03:23') || exit 1; 14 | matchStart () { 15 | end=$lenght; 16 | while ! [ ${end} = ${position} ]; do 17 | match="${line:position:end}"; 18 | if grep -qF "${match}" "tmp/$name-subtitles.txt"; then 19 | addSeconds "$(getStart)" "-1"; 20 | return; 21 | fi; 22 | end=$((end-1)); 23 | done; 24 | echo "Failed to match start of line: $line"; exit 1; 25 | } 26 | matchEnd () { 27 | matchStart > /dev/null; 28 | if [ ${end} = ${lenght} ]; then 29 | addSeconds "$(getEnd)" "3"; 30 | return; 31 | fi 32 | position=$((end+1)); 33 | end=${lenght}; 34 | while ! [ ${end} = ${position} ]; do 35 | match="${line:position:end}"; 36 | if grep -qF "${match}" "tmp/$name-subtitles.txt"; then 37 | addSeconds "$(getEnd)" "3"; 38 | return; 39 | fi; 40 | position=$((position+1)); 41 | done; 42 | echo "Failed to match end of line: $line"; exit 1; 43 | } 44 | match () { 45 | cat tmp/$name-dialogs.utf.summary.txt | 46 | while read line; do 47 | lenght="${#line}"; 48 | position=0; 49 | start="$(matchStart)"; 50 | finish="$(matchEnd)"; 51 | echo "${start} ${finish} ${line}" >> tmp/$name-extracts.txt; 52 | done; 53 | } 54 | extract () { 55 | lineId=0; 56 | cat tmp/$name-extracts.txt |sort >tmp/$name-extracts-sorted.txt; 57 | cat tmp/$name-extracts-sorted.txt | 58 | while read line; do 59 | start="$(echo "$line"|awk '{print $1}')"; 60 | finish="$(echo "$line"|awk '{print $2}')"; 61 | echo "$start $finish"; 62 | echo "file '${PWD}/tmp/$name-summary${lineId}.mp4'" >> tmp/$name-concate.txt; 63 | cat /dev/null | 64 | ffmpeg -nostats -loglevel panic -i "$movie" -ss "${start}" -to "${finish}" -map_metadata -1 $OPTIONS tmp/$name-summary${lineId}.mp4; 65 | lineId=$((lineId+1)); 66 | done; 67 | cat /dev/null |ffmpeg -nostats -loglevel panic -f concat -i tmp/$name-concate.txt -c copy out/$name-summary.mp4 68 | } 69 | sanitizeSubtitles () { 70 | cat "$subtitles" | 71 | iconv -c -t UTF-8 | 72 | # Tripple dot overuse causes trouble. 73 | #sed 's/\.\+/\./g' | 74 | sed 's/([^)]*)//g' | 75 | sed 's/<[^>]*>//g' | 76 | tr '-' '_' | tr -d '\r' | grep -v '^[0-9]*$' | 77 | while read line; do 78 | if ! echo "$line" | grep -q '^[0-9]'; then 79 | echo -n "$line "; 80 | else 81 | echo; 82 | echo "$line"; 83 | fi; 84 | done | 85 | grep -v '^[ ]*$' > tmp/$name-subtitles.txt; 86 | } 87 | summarize () { 88 | cat tmp/$name-subtitles.txt |grep -v '^[0-9]' > tmp/$name-dialogs.utf.txt; 89 | ~/.local/bin/sumy lex-rank --length=15 --file tmp/$name-dialogs.utf.txt > tmp/$name-dialogs.utf.summary.txt; 90 | } 91 | main () { 92 | movie="$1"; 93 | name="$(basename "$1")"; 94 | subtitles="$2"; 95 | if [ "$#" = 3 ] && [ "$3" = "-fast" ]; then 96 | OPTIONS="-c copy"; 97 | else 98 | OPTIONS="-strict -2"; 99 | fi 100 | rm tmp/$name-* || true; 101 | rm out/$name-summary.mp4 || true; 102 | sanitizeSubtitles; 103 | summarize; 104 | match; 105 | extract; 106 | } 107 | set -ue 108 | main "$@"; 109 | --------------------------------------------------------------------------------