├── images └── submerger.png ├── README.md ├── LICENSE └── submerger.lua /images/submerger.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daxliar/submerger/HEAD/images/submerger.png -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # SubMerger - SRT Subtitles Merger 2 | 3 | Lua script to merge two input subtitles in SRT format in a new SRT file. 4 | This tool is meant to help people that are learning new languages and would need to have on screen the two translations at the same time. 5 | Here is an example of how VLC would playback the file generated (colors are configurable). 6 | 7 | ![GitHub Logo](/images/submerger.png) 8 | 9 | ## Usage 10 | 11 | It requires first the two input subtitle fils in SRT format then an out SRT file 12 | 13 | ```bash 14 | $ ./submerger.lua [html color code 1] [html color code 2] 15 | ``` 16 | 17 | ### Esample 18 | 19 | ```bash 20 | $ ./submerger.lua first_language.srt second_language.srt merged.srt 21 | Imported 620 blocks from "first_language.srt" 22 | Imported 587 blocks from "second_language.srt" 23 | Written 514 blocks to "merged.srt" 24 | ``` 25 | 26 | ## Requirements 27 | 28 | Lua 5.1 or later is required. 29 | 30 | ## TODO 31 | 32 | * Make this command line tool a VLC addon. 33 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /submerger.lua: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env lua 2 | 3 | -- Default colors to use when writing the merged values 4 | -- they have to be valid html color codes as used inside the tag 5 | local default_color_a = "white" 6 | local default_color_b = "yellow" 7 | 8 | -- Utility to covert from a srt timestamp in the format "00:00:00,000" to a number 9 | function str_timestamp_to_seconds( timestamp ) 10 | hours, minutes, seconds, milliseconds = string.match( timestamp, "(%d+):(%d+):(%d+),(%d+)") 11 | hours = tonumber(hours) * 3600 12 | minutes = tonumber(minutes) * 60 13 | seconds = tonumber(seconds) 14 | milliseconds = tonumber(milliseconds) * 0.001 15 | return hours + minutes + seconds + milliseconds 16 | end 17 | 18 | -- Utility to covert from a number in seconds to an srt timestamp in the format "00:00:00,000" 19 | function seconds_to_str_timestamp( seconds ) 20 | local total_seconds, fractinal_part = math.modf( tonumber(seconds) ) 21 | local total_hours = math.floor(total_seconds / 3600) 22 | local total_minutes = math.floor(total_seconds / 60) % 60 23 | total_seconds = total_seconds % 60 24 | return string.format("%02.f:%02.f:%02.f,%03.f", total_hours, total_minutes, total_seconds, fractinal_part * 1000 ) 25 | end 26 | 27 | -- Tiny utility function to trim a string 28 | function trim(s) 29 | return (s:gsub("^%s*(.-)%s*$", "%1")) 30 | end 31 | 32 | -- Generate an subtitle entry with times and text. Trnslated inserted to be used later on 33 | -- It takes timestamps in both formats 34 | function generate_entry( in_start_time, in_end_time, in_text ) 35 | local insert_start_time = 0 36 | local insert_end_time = 0 37 | if type(in_start_time) == "number" then 38 | insert_start_time = in_start_time 39 | else 40 | insert_start_time = str_timestamp_to_seconds(in_start_time) 41 | end 42 | if type(in_end_time) == "number" then 43 | insert_end_time = in_end_time 44 | else 45 | insert_end_time = str_timestamp_to_seconds(in_end_time) 46 | end 47 | return { start_time=insert_start_time, end_time=insert_end_time, text=in_text, translated={} } 48 | end 49 | 50 | -- Main loop to open an SRT file and return a table with all the elements from the file 51 | -- generate_entry is used for every block created 52 | function import_srt( filename ) 53 | local srts = {} 54 | local file,error = io.open(filename, "r") 55 | if not err then 56 | local line_type = "index" 57 | local last_index = -1 58 | local current_text = "" 59 | local current_index = 0 60 | local current_start_time = nil 61 | local current_end_time = nil 62 | while true do 63 | local line = file:read() 64 | if line == nil then 65 | break 66 | end 67 | -- first read the index 68 | if line_type == "index" then 69 | last_index = current_index 70 | trimmed_line = trim(line) 71 | current_index = tonumber(trimmed_line) 72 | line_type = "time" 73 | current_text = "" 74 | -- then get the time interval 75 | elseif line_type == "time" then 76 | current_start_time, current_end_time = string.match( line, "(%d+:%d+:%d+,%d+) --.* (%d+:%d+:%d+,%d+)") 77 | line_type = "text" 78 | -- and finally get all the lines of text 79 | elseif line_type == "text" then 80 | -- until we get an empty line, in this case we restart 81 | local trimmed_text = trim(string.gsub(line, "\n", "")) 82 | if trimmed_text == "" then 83 | line_type = "index" 84 | table.insert(srts, generate_entry(current_start_time, current_end_time, current_text ) ) 85 | else 86 | if current_text == "" then 87 | current_text = trimmed_text 88 | else 89 | current_text = current_text .. " " .. trimmed_text 90 | end 91 | end 92 | end 93 | end 94 | file:close() 95 | print("Imported " .. tostring(#srts) .. " blocks from \"" .. tostring(filename) .. "\"") 96 | else 97 | print("Error: Can't import file \"" .. tostring(filename) .. "\" for writing!") 98 | end 99 | return srts 100 | end 101 | 102 | -- Check whether two time intervals overlaps 103 | -- Epsilon used to make every range a bit more 'fat' to help overlapping 104 | function do_intervals_overlap( start_time_a, end_time_a, start_time_b, end_time_b, epsilon ) 105 | return ((start_time_b - epsilon < end_time_a + epsilon ) and (end_time_b + epsilon > start_time_a - epsilon)) or 106 | ((start_time_a - epsilon < end_time_b + epsilon ) and (end_time_a + epsilon > start_time_b - epsilon)) 107 | end 108 | 109 | -- Extract the concatenated string of all the texts from srt using indexes 110 | function extract_text_from_indexes( srt, indexes ) 111 | local final_text = "" 112 | if srt ~= nil then 113 | for k,v in pairs(indexes) do 114 | if final_text == "" then 115 | final_text = srt[v]["text"] 116 | else 117 | final_text = final_text .. " " .. srt[v]["text"] 118 | end 119 | end 120 | end 121 | return final_text 122 | end 123 | 124 | -- Given indexes return the minimum start time and maximum end time from the blocks in srt 125 | function extract_start_end_time_from_indexes( srt, indexes ) 126 | local start_time = 0.0 127 | local end_time = 0.0 128 | if srt ~= nil then 129 | for k,v in pairs(indexes) do 130 | local current_start_time = srt[v]["start_time"] 131 | local current_end_time = srt[v]["end_time"] 132 | if start_time == 0.0 or current_start_time < start_time then 133 | start_time = current_start_time 134 | end 135 | if end_time == 0.0 or current_end_time > end_time then 136 | end_time = current_end_time 137 | end 138 | end 139 | end 140 | return start_time, end_time 141 | end 142 | 143 | -- Merge two strings and create a valid html string with the two coloured text one after each other 144 | function merge_srt_texts( text_a, text_b, color_a, color_b ) 145 | local selected_color_a = tostring(color_a or default_color_a) 146 | local selected_color_b = tostring(color_b or default_color_b) 147 | return string.format("%s
%s", selected_color_a, text_a, selected_color_b, text_b ) 148 | end 149 | 150 | -- Given two tables with the blocks read from the two srt file return a unique table with the new merged srt blocks 151 | -- The returned element has the same structure as the two inputs 152 | function merge_srts( srt_a, srt_b ) 153 | 154 | local srts = {} 155 | local error = 0.0 156 | local last_previous_overlap = 0 157 | local inserted_index = 0 158 | local overlaps = { } 159 | 160 | -- First create a table with all the overlaps 161 | -- Each overlaps has entries with indexes from A 'srt_a_indexes' and B 'srt_b_indexes' 162 | for ka,va in pairs(srt_a) do 163 | 164 | local current_index = tonumber(ka) 165 | local current_overlaps = {} 166 | local removed_previous = false 167 | 168 | for kb,vb in pairs(srt_b) do 169 | if do_intervals_overlap( va["start_time"], va["end_time"], vb["start_time"], vb["end_time"], error ) then 170 | local curret_overlap_index = tonumber( kb ) 171 | table.insert(current_overlaps, curret_overlap_index) 172 | last_previous_overlap = curret_overlap_index 173 | 174 | if removed_previous == false and #current_overlaps == 2 then 175 | table.remove(current_overlaps, 1 ) 176 | removed_previous = true 177 | end 178 | end 179 | end 180 | 181 | local insert_in_open_block = false 182 | if inserted_index > 0 then 183 | for k,v in pairs(overlaps[inserted_index]["srt_b_indexes"]) do 184 | if v == last_previous_overlap then 185 | insert_in_open_block = true 186 | break 187 | end 188 | end 189 | end 190 | 191 | if insert_in_open_block then 192 | table.insert( overlaps[inserted_index]["srt_a_indexes"], current_index ) 193 | else 194 | table.insert( overlaps, { srt_a_indexes={ current_index }, srt_b_indexes=current_overlaps } ) 195 | inserted_index = inserted_index + 1 196 | end 197 | 198 | end 199 | 200 | -- generate final block from overlaps 201 | for k,v in pairs(overlaps) do 202 | local srt_a_indexes = v["srt_a_indexes"] 203 | local srt_b_indexes = v["srt_b_indexes"] 204 | 205 | local start_time, end_time = extract_start_end_time_from_indexes( srt_a, srt_a_indexes ) 206 | 207 | local text_a = extract_text_from_indexes( srt_a, srt_a_indexes ) 208 | local text_b = extract_text_from_indexes( srt_b, srt_b_indexes ) 209 | 210 | table.insert(srts, generate_entry(start_time, end_time, merge_srt_texts( text_a, text_b ) ) ) 211 | end 212 | return srts 213 | end 214 | 215 | -- Writes srt data to a file 216 | function write_srt( filename, srt ) 217 | if srt ~= nil then 218 | local file = io.open(filename, "w") 219 | if file ~= nil then 220 | for k,v in pairs(srt) do 221 | file:write(string.format( "%s\n", tostring(k))) 222 | local start_time = seconds_to_str_timestamp(v["start_time"]) 223 | local end_time = seconds_to_str_timestamp(v["end_time"]) 224 | file:write(string.format( "%s --> %s\n", start_time, end_time)) 225 | file:write(string.format( "%s\n\n", v["text"] )) 226 | end 227 | file:close() 228 | print("Written " .. tostring(#srt) .. " blocks to \"" .. tostring(filename) .. "\"") 229 | else 230 | print("Error: Can't open file \"" .. tostring(filename) .. "\" for writing!") 231 | end 232 | else 233 | print("Error: Nothing to write in the output srt file!") 234 | end 235 | end 236 | 237 | -- Check if running as library or as a program 238 | if pcall(debug.getlocal, 4, 1) then 239 | print("You are using " .. arg[0] .. " as a library") 240 | else 241 | local num_args = #arg 242 | if num_args >= 3 or num_args <= 5 then 243 | 244 | default_color_a = arg[4] or default_color_a 245 | default_color_b = arg[5] or default_color_b 246 | 247 | write_srt( arg[3], merge_srts( import_srt(arg[1]), import_srt(arg[2]) ) ) 248 | else 249 | print( "Usage: " .. arg[0] .. " [html color code 1] [html color code 2]") 250 | end 251 | end 252 | --------------------------------------------------------------------------------