├── images
└── submerger.png
├── README.md
├── LICENSE
└── submerger.lua
/images/submerger.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daxliar/submerger/HEAD/images/submerger.png
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # SubMerger - SRT Subtitles Merger
2 |
3 | Lua script to merge two input subtitles in SRT format in a new SRT file.
4 | This tool is meant to help people that are learning new languages and would need to have on screen the two translations at the same time.
5 | Here is an example of how VLC would playback the file generated (colors are configurable).
6 |
7 | 
8 |
9 | ## Usage
10 |
11 | It requires first the two input subtitle fils in SRT format then an out SRT file
12 |
13 | ```bash
14 | $ ./submerger.lua [html color code 1] [html color code 2]
15 | ```
16 |
17 | ### Esample
18 |
19 | ```bash
20 | $ ./submerger.lua first_language.srt second_language.srt merged.srt
21 | Imported 620 blocks from "first_language.srt"
22 | Imported 587 blocks from "second_language.srt"
23 | Written 514 blocks to "merged.srt"
24 | ```
25 |
26 | ## Requirements
27 |
28 | Lua 5.1 or later is required.
29 |
30 | ## TODO
31 |
32 | * Make this command line tool a VLC addon.
33 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2018
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/submerger.lua:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env lua
2 |
3 | -- Default colors to use when writing the merged values
4 | -- they have to be valid html color codes as used inside the tag
5 | local default_color_a = "white"
6 | local default_color_b = "yellow"
7 |
8 | -- Utility to covert from a srt timestamp in the format "00:00:00,000" to a number
9 | function str_timestamp_to_seconds( timestamp )
10 | hours, minutes, seconds, milliseconds = string.match( timestamp, "(%d+):(%d+):(%d+),(%d+)")
11 | hours = tonumber(hours) * 3600
12 | minutes = tonumber(minutes) * 60
13 | seconds = tonumber(seconds)
14 | milliseconds = tonumber(milliseconds) * 0.001
15 | return hours + minutes + seconds + milliseconds
16 | end
17 |
18 | -- Utility to covert from a number in seconds to an srt timestamp in the format "00:00:00,000"
19 | function seconds_to_str_timestamp( seconds )
20 | local total_seconds, fractinal_part = math.modf( tonumber(seconds) )
21 | local total_hours = math.floor(total_seconds / 3600)
22 | local total_minutes = math.floor(total_seconds / 60) % 60
23 | total_seconds = total_seconds % 60
24 | return string.format("%02.f:%02.f:%02.f,%03.f", total_hours, total_minutes, total_seconds, fractinal_part * 1000 )
25 | end
26 |
27 | -- Tiny utility function to trim a string
28 | function trim(s)
29 | return (s:gsub("^%s*(.-)%s*$", "%1"))
30 | end
31 |
32 | -- Generate an subtitle entry with times and text. Trnslated inserted to be used later on
33 | -- It takes timestamps in both formats
34 | function generate_entry( in_start_time, in_end_time, in_text )
35 | local insert_start_time = 0
36 | local insert_end_time = 0
37 | if type(in_start_time) == "number" then
38 | insert_start_time = in_start_time
39 | else
40 | insert_start_time = str_timestamp_to_seconds(in_start_time)
41 | end
42 | if type(in_end_time) == "number" then
43 | insert_end_time = in_end_time
44 | else
45 | insert_end_time = str_timestamp_to_seconds(in_end_time)
46 | end
47 | return { start_time=insert_start_time, end_time=insert_end_time, text=in_text, translated={} }
48 | end
49 |
50 | -- Main loop to open an SRT file and return a table with all the elements from the file
51 | -- generate_entry is used for every block created
52 | function import_srt( filename )
53 | local srts = {}
54 | local file,error = io.open(filename, "r")
55 | if not err then
56 | local line_type = "index"
57 | local last_index = -1
58 | local current_text = ""
59 | local current_index = 0
60 | local current_start_time = nil
61 | local current_end_time = nil
62 | while true do
63 | local line = file:read()
64 | if line == nil then
65 | break
66 | end
67 | -- first read the index
68 | if line_type == "index" then
69 | last_index = current_index
70 | trimmed_line = trim(line)
71 | current_index = tonumber(trimmed_line)
72 | line_type = "time"
73 | current_text = ""
74 | -- then get the time interval
75 | elseif line_type == "time" then
76 | current_start_time, current_end_time = string.match( line, "(%d+:%d+:%d+,%d+) --.* (%d+:%d+:%d+,%d+)")
77 | line_type = "text"
78 | -- and finally get all the lines of text
79 | elseif line_type == "text" then
80 | -- until we get an empty line, in this case we restart
81 | local trimmed_text = trim(string.gsub(line, "\n", ""))
82 | if trimmed_text == "" then
83 | line_type = "index"
84 | table.insert(srts, generate_entry(current_start_time, current_end_time, current_text ) )
85 | else
86 | if current_text == "" then
87 | current_text = trimmed_text
88 | else
89 | current_text = current_text .. " " .. trimmed_text
90 | end
91 | end
92 | end
93 | end
94 | file:close()
95 | print("Imported " .. tostring(#srts) .. " blocks from \"" .. tostring(filename) .. "\"")
96 | else
97 | print("Error: Can't import file \"" .. tostring(filename) .. "\" for writing!")
98 | end
99 | return srts
100 | end
101 |
102 | -- Check whether two time intervals overlaps
103 | -- Epsilon used to make every range a bit more 'fat' to help overlapping
104 | function do_intervals_overlap( start_time_a, end_time_a, start_time_b, end_time_b, epsilon )
105 | return ((start_time_b - epsilon < end_time_a + epsilon ) and (end_time_b + epsilon > start_time_a - epsilon)) or
106 | ((start_time_a - epsilon < end_time_b + epsilon ) and (end_time_a + epsilon > start_time_b - epsilon))
107 | end
108 |
109 | -- Extract the concatenated string of all the texts from srt using indexes
110 | function extract_text_from_indexes( srt, indexes )
111 | local final_text = ""
112 | if srt ~= nil then
113 | for k,v in pairs(indexes) do
114 | if final_text == "" then
115 | final_text = srt[v]["text"]
116 | else
117 | final_text = final_text .. " " .. srt[v]["text"]
118 | end
119 | end
120 | end
121 | return final_text
122 | end
123 |
124 | -- Given indexes return the minimum start time and maximum end time from the blocks in srt
125 | function extract_start_end_time_from_indexes( srt, indexes )
126 | local start_time = 0.0
127 | local end_time = 0.0
128 | if srt ~= nil then
129 | for k,v in pairs(indexes) do
130 | local current_start_time = srt[v]["start_time"]
131 | local current_end_time = srt[v]["end_time"]
132 | if start_time == 0.0 or current_start_time < start_time then
133 | start_time = current_start_time
134 | end
135 | if end_time == 0.0 or current_end_time > end_time then
136 | end_time = current_end_time
137 | end
138 | end
139 | end
140 | return start_time, end_time
141 | end
142 |
143 | -- Merge two strings and create a valid html string with the two coloured text one after each other
144 | function merge_srt_texts( text_a, text_b, color_a, color_b )
145 | local selected_color_a = tostring(color_a or default_color_a)
146 | local selected_color_b = tostring(color_b or default_color_b)
147 | return string.format("%s %s ", selected_color_a, text_a, selected_color_b, text_b )
148 | end
149 |
150 | -- Given two tables with the blocks read from the two srt file return a unique table with the new merged srt blocks
151 | -- The returned element has the same structure as the two inputs
152 | function merge_srts( srt_a, srt_b )
153 |
154 | local srts = {}
155 | local error = 0.0
156 | local last_previous_overlap = 0
157 | local inserted_index = 0
158 | local overlaps = { }
159 |
160 | -- First create a table with all the overlaps
161 | -- Each overlaps has entries with indexes from A 'srt_a_indexes' and B 'srt_b_indexes'
162 | for ka,va in pairs(srt_a) do
163 |
164 | local current_index = tonumber(ka)
165 | local current_overlaps = {}
166 | local removed_previous = false
167 |
168 | for kb,vb in pairs(srt_b) do
169 | if do_intervals_overlap( va["start_time"], va["end_time"], vb["start_time"], vb["end_time"], error ) then
170 | local curret_overlap_index = tonumber( kb )
171 | table.insert(current_overlaps, curret_overlap_index)
172 | last_previous_overlap = curret_overlap_index
173 |
174 | if removed_previous == false and #current_overlaps == 2 then
175 | table.remove(current_overlaps, 1 )
176 | removed_previous = true
177 | end
178 | end
179 | end
180 |
181 | local insert_in_open_block = false
182 | if inserted_index > 0 then
183 | for k,v in pairs(overlaps[inserted_index]["srt_b_indexes"]) do
184 | if v == last_previous_overlap then
185 | insert_in_open_block = true
186 | break
187 | end
188 | end
189 | end
190 |
191 | if insert_in_open_block then
192 | table.insert( overlaps[inserted_index]["srt_a_indexes"], current_index )
193 | else
194 | table.insert( overlaps, { srt_a_indexes={ current_index }, srt_b_indexes=current_overlaps } )
195 | inserted_index = inserted_index + 1
196 | end
197 |
198 | end
199 |
200 | -- generate final block from overlaps
201 | for k,v in pairs(overlaps) do
202 | local srt_a_indexes = v["srt_a_indexes"]
203 | local srt_b_indexes = v["srt_b_indexes"]
204 |
205 | local start_time, end_time = extract_start_end_time_from_indexes( srt_a, srt_a_indexes )
206 |
207 | local text_a = extract_text_from_indexes( srt_a, srt_a_indexes )
208 | local text_b = extract_text_from_indexes( srt_b, srt_b_indexes )
209 |
210 | table.insert(srts, generate_entry(start_time, end_time, merge_srt_texts( text_a, text_b ) ) )
211 | end
212 | return srts
213 | end
214 |
215 | -- Writes srt data to a file
216 | function write_srt( filename, srt )
217 | if srt ~= nil then
218 | local file = io.open(filename, "w")
219 | if file ~= nil then
220 | for k,v in pairs(srt) do
221 | file:write(string.format( "%s\n", tostring(k)))
222 | local start_time = seconds_to_str_timestamp(v["start_time"])
223 | local end_time = seconds_to_str_timestamp(v["end_time"])
224 | file:write(string.format( "%s --> %s\n", start_time, end_time))
225 | file:write(string.format( "%s\n\n", v["text"] ))
226 | end
227 | file:close()
228 | print("Written " .. tostring(#srt) .. " blocks to \"" .. tostring(filename) .. "\"")
229 | else
230 | print("Error: Can't open file \"" .. tostring(filename) .. "\" for writing!")
231 | end
232 | else
233 | print("Error: Nothing to write in the output srt file!")
234 | end
235 | end
236 |
237 | -- Check if running as library or as a program
238 | if pcall(debug.getlocal, 4, 1) then
239 | print("You are using " .. arg[0] .. " as a library")
240 | else
241 | local num_args = #arg
242 | if num_args >= 3 or num_args <= 5 then
243 |
244 | default_color_a = arg[4] or default_color_a
245 | default_color_b = arg[5] or default_color_b
246 |
247 | write_srt( arg[3], merge_srts( import_srt(arg[1]), import_srt(arg[2]) ) )
248 | else
249 | print( "Usage: " .. arg[0] .. " [html color code 1] [html color code 2]")
250 | end
251 | end
252 |
--------------------------------------------------------------------------------