├── LICENSE
├── README.md
└── mkvextractor.py
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2022 dropcreations
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | __mkvExtractor__
4 | =========
5 |
6 | This python script is to use __MKVToolNix's mkvextract__ CLI tool.
7 | You can extract content from both __MKV__ and __WebM__ containers.
8 |
9 | ## __Usage__
10 |
11 | - Open __Terminal__ and type below command.
12 | - You can add one or more files at once.
13 | ```shell
14 | python mkvextractor.py [file_01] [file_02] [file_03]...
15 | ```
16 | - You can also add a folder that includes MKV and WebM files.
17 | - Don't add more than one folder.
18 | ```shell
19 | python mkvextractor.py [folder_path]
20 | ```
21 | - You can extract,
22 | - [__All tracks__](#extract-mode--all-tracks)
23 | - [__Single tracks__](#extract-mode--single-tracks)
24 | - [__Chapters__](#extract-mode--chapters)
25 | - [__Attachments__](#extract-mode--attachments)
26 | - [__Timestamps__](#extract-mode--timestamps)
27 | - [__Cues__](#extract-mode--cues)
28 | - [__Cue Sheets__](#extract-mode--cue-sheets)
29 | - [__Tags__](#extract-mode--tags)
30 |
31 | ### __Extract Mode : All tracks__
32 |
33 | You can extract all video, audio and subtitle tracks available in all inputs.
34 |
35 | ### __Extract Mode : Single tracks__
36 |
37 | In this mode, analyze every input and show a list of tracks that available, then you can enter track IDs that you want to extract.
38 | Please seperate track numbers by a comma and a space
39 | - eg: `trackID: 0, 1, 2,...`
40 |
41 | ### __Extract Mode : Chapters__
42 |
43 | You can extract chapters in both XML and OGM formats. Provide your choice when it asked.
44 |
45 | ### __Extract Mode : Attachments__
46 |
47 | In this mode, also analyze every input and show a list of attachments that available, then you can enter attachment IDs that you want to extract.
48 | Please seperate attachment IDs by a comma and a space
49 | - eg: `attachmentID: 1, 2, 3,...`
50 |
51 | ### __Extract Mode : Timestamps__
52 |
53 | You can extract timestamps for all tracks at once.
54 |
55 | ### __Extract Mode : Cues__
56 |
57 | You can extract cues for all tracks that available at once.
58 |
59 | ### __Extract Mode : Cue Sheets__
60 |
61 | You can extract cue sheet in all inputs if available.
62 |
63 | ### __Extract Mode : Tags__
64 |
65 | You can extract tags in all inputs if available.
66 |
--------------------------------------------------------------------------------
/mkvextractor.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | import json
4 | import subprocess
5 |
6 | inputCount = len(sys.argv)
7 |
8 | mkvList = []
9 | webmList = []
10 |
11 | #process all inputs and get 'mkv' and 'webm' files.
12 |
13 | def inputProcess():
14 | if inputCount == 2:
15 | if os.path.isfile(sys.argv[1]) is False:
16 | for inputFile in os.listdir(sys.argv[1]):
17 | if os.path.splitext(inputFile)[1] == '.mkv':
18 | mkvList.append(inputFile)
19 | elif os.path.splitext(inputFile)[1] == '.webm':
20 | webmList.append(inputFile)
21 | else:
22 | mkvList.append(sys.argv[1])
23 | elif inputCount > 2:
24 | for inputID in range(1, inputCount):
25 | if os.path.splitext(sys.argv[inputID])[1] == '.mkv':
26 | mkvList.append(sys.argv[inputID])
27 | elif os.path.splitext(sys.argv[inputID])[1] == '.webm':
28 | webmList.append(sys.argv[inputID])
29 | else:
30 | print(f'Please provide inputs...')
31 |
32 | #get stream information in json format.
33 |
34 | def get_output(mediaFile):
35 | global jsonData
36 | mkvmerge_JSON = subprocess.check_output(
37 | [
38 | 'mkvmerge',
39 | '--identify',
40 | '--identification-format',
41 | 'json',
42 | os.path.abspath(mediaFile),
43 | ],
44 | stderr=subprocess.DEVNULL
45 | )
46 | jsonData = json.loads(mkvmerge_JSON)
47 |
48 | #parse data from json output
49 |
50 | def get_tracks(mediaFile):
51 | get_output(mediaFile)
52 | global id; id = jsonData.get('tracks')[int(i)].get('id')
53 | global language; language = jsonData.get('tracks')[int(i)].get('properties').get('language')
54 | global language_ietf; language_ietf = jsonData.get('tracks')[int(i)].get('properties').get('language_ietf')
55 | global title; title = jsonData.get('tracks')[int(i)].get('properties').get('track_name')
56 | global codec_id; codec_id = jsonData.get('tracks')[int(i)].get('properties').get('codec_id')
57 | global codec; codec = jsonData.get('tracks')[int(i)].get('codec')
58 | global track_type; track_type = jsonData.get('tracks')[int(i)].get('type')
59 |
60 | def get_attachments(mediaFile):
61 | get_output(mediaFile)
62 | global attach_id; attach_id = jsonData.get('attachments')[int(i)].get('id')
63 | global attach_type; attach_type = jsonData.get('attachments')[int(i)].get('content_type')
64 | global attach_desc; attach_desc = jsonData.get('attachments')[int(i)].get('description')
65 | global attach_name; attach_name = jsonData.get('attachments')[int(i)].get('file_name')
66 | global attach_uid; attach_uid = jsonData.get('attachments')[int(i)].get('properties').get('uid')
67 |
68 | #List available tracks
69 |
70 | def viewTracks(mediaFile):
71 | global i
72 | get_output(mediaFile)
73 | trackCount = len(jsonData['tracks'])
74 | print(os.path.basename(mediaFile))
75 | for i in range(trackCount):
76 | get_tracks(mediaFile)
77 | print(f'\nTrack ID : {id}')
78 | print(f' |')
79 | print(f' |--Type : {track_type}')
80 | print(f' |--Codec : {codec}')
81 | print(f' |--Language : {language}')
82 | print(f' |--Language_ietf : {language_ietf}')
83 | print(f' |--Title : {title}')
84 |
85 | #List available attachments
86 |
87 | def viewAttachments(mediaFile):
88 | global i
89 | get_output(mediaFile)
90 | attachmentCount = len(jsonData['attachments'])
91 | print(os.path.basename(mediaFile))
92 | for i in range(attachmentCount):
93 | get_attachments(mediaFile)
94 | print(f'\nAttachment ID : {attach_id}')
95 | print(f' |')
96 | print(f' |--ContentType : {attach_type}')
97 | print(f' |--Filename : {attach_name}')
98 | print(f' |--Description : {attach_desc}')
99 | print(f' |--UID : {attach_uid}')
100 |
101 | #process the input file
102 |
103 | def processFile(mediaFile):
104 | global extractName
105 |
106 | if track_type == 'video':
107 | get_output(mediaFile)
108 | pixel_dimensions = jsonData.get('tracks')[int(i)].get('properties').get('pixel_dimensions')
109 | extractName = f'TrackID_{id}_[{track_type}]_[{pixel_dimensions}]_[{language}]'
110 | elif track_type == 'audio':
111 | get_output(mediaFile)
112 | audio_channels = jsonData.get('tracks')[int(i)].get('properties').get('audio_channels')
113 | audio_sampling_frequency = jsonData.get('tracks')[int(i)].get('properties').get('audio_sampling_frequency')
114 | extractName = f'TrackID_{id}_[{track_type}]_[{audio_channels}CH]_[{audio_sampling_frequency / 1000}kHz]_[{language}]'
115 | elif track_type == "subtitles":
116 | extractName = f'TrackID_{id}_[{track_type}]_[{language}]'
117 |
118 | if "AVC" in codec_id:
119 | extractName = extractName + ".264"
120 | elif "HEVC" in codec_id:
121 | extractName = extractName + ".hevc"
122 | elif "V_VP8" in codec_id:
123 | extractName = extractName + ".ivf"
124 | elif "V_VP9" in codec_id:
125 | extractName = extractName + ".ivf"
126 | elif "V_AV1" in codec_id:
127 | extractName = extractName + ".ivf"
128 | elif "V_MPEG1" in codec_id:
129 | extractName = extractName + ".mpg"
130 | elif "V_MPEG2" in codec_id:
131 | extractName = extractName + ".mpg"
132 | elif "V_REAL" in codec_id:
133 | extractName = extractName + ".rm"
134 | elif "V_THEORA" in codec_id:
135 | extractName = extractName + ".ogg"
136 | elif "V_MS/VFW/FOURCC" in codec_id:
137 | extractName = extractName + ".avi"
138 | elif "AAC" in codec_id:
139 | extractName = extractName + ".aac"
140 | elif "A_AC3" in codec_id:
141 | extractName = extractName + ".ac3"
142 | elif "A_EAC3" in codec_id:
143 | extractName = extractName + ".eac3"
144 | elif "ALAC" in codec_id:
145 | extractName = extractName + ".caf"
146 | elif "DTS" in codec_id:
147 | extractName = extractName + ".dts"
148 | elif "FLAC" in codec_id:
149 | extractName = extractName + ".flac"
150 | elif "MPEG/L2" in codec_id:
151 | extractName = extractName + ".mp2"
152 | elif "MPEG/L3" in codec_id:
153 | extractName = extractName + ".mp3"
154 | elif "OPUS" in codec_id:
155 | extractName = extractName + ".ogg"
156 | elif "PCM" in codec_id:
157 | extractName = extractName + ".wav"
158 | elif "REAL" in codec_id:
159 | extractName = extractName + ".ra"
160 | elif "TRUEHD" in codec_id:
161 | extractName = extractName + ".thd"
162 | elif "MLP" in codec_id:
163 | extractName = extractName + ".mlp"
164 | elif "TTA1" in codec_id:
165 | extractName = extractName + ".tta"
166 | elif "VORBIS" in codec_id:
167 | extractName = extractName + ".ogg"
168 | elif "WAVPACK4" in codec_id:
169 | extractName = extractName + ".wv"
170 | elif "PGS" in codec_id:
171 | extractName = extractName + ".sup"
172 | elif "ASS" in codec_id:
173 | extractName = extractName + ".ass"
174 | elif "SSA" in codec_id:
175 | extractName = extractName + ".ssa"
176 | elif "UTF8" in codec_id:
177 | extractName = extractName + ".srt"
178 | elif "ASCII" in codec_id:
179 | extractName = extractName + ".srt"
180 | elif "VOBSUB" in codec_id:
181 | extractName = extractName + ".sub"
182 | elif "S_KATE" in codec_id:
183 | extractName = extractName + ".ogg"
184 | elif "USF" in codec_id:
185 | extractName = extractName + ".usf"
186 | elif "WEBVTT" in codec_id:
187 | extractName = extractName + ".vtt"
188 |
189 | #make the items extract folder
190 |
191 | def makeFolder(mediaFile):
192 | mediaFolder = os.path.dirname(mediaFile)
193 | mediaName = os.path.splitext(os.path.basename(mediaFile))[0]
194 | global extractFolder; extractFolder = os.path.join(mediaFolder, mediaName)
195 | os.makedirs(extractFolder, exist_ok=True)
196 |
197 | #run commands for extract all tracks available
198 |
199 | def runTracks(mediaFile):
200 | global i
201 | commandList = []
202 | makeFolder(mediaFile)
203 | get_output(mediaFile)
204 | trackCount = len(jsonData['tracks'])
205 | for i in range(trackCount):
206 | get_tracks(mediaFile)
207 | processFile(mediaFile)
208 | extractPath = os.path.join(extractFolder, extractName)
209 | extractParam = f'{id}:"{extractPath}"'
210 | commandList.append(extractParam)
211 | extractParam = ' '.join(commandList)
212 | command = f'mkvextract "{mediaFile}" tracks {extractParam}'
213 | process = subprocess.run(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
214 | print(process.stdout.decode())
215 |
216 | #run commands for extract a specific track(s)
217 |
218 | def runTrack(mediaFile):
219 | global i
220 | commandList = []
221 | makeFolder(mediaFile)
222 | viewTracks(mediaFile)
223 | trackID = input(f'\ntrackID: ')
224 | trackID = trackID.split(', ')
225 | for i in trackID:
226 | get_tracks(mediaFile)
227 | processFile(mediaFile)
228 | extractPath = os.path.join(extractFolder, extractName)
229 | extractParam = f'{id}:"{extractPath}"'
230 | commandList.append(extractParam)
231 | extractParam = ' '.join(commandList)
232 | command = f'mkvextract "{mediaFile}" tracks {extractParam}'
233 | process = subprocess.run(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
234 | print('\n' + process.stdout.decode())
235 |
236 | #run commands for extract chapters
237 |
238 | def runChapters(mediaFile):
239 | get_output(mediaFile)
240 | if len(jsonData.get('chapters')) > 0:
241 | makeFolder(mediaFile)
242 | if chaptersMode == 1:
243 | extractPath = os.path.join(extractFolder, 'Chapters_XML.xml')
244 | command = f'mkvextract "{mediaFile}" chapters "{extractPath}"'
245 | if chaptersMode == 2:
246 | extractPath = os.path.join(extractFolder, 'Chapters_OGM.txt')
247 | command = f'mkvextract "{mediaFile}" chapters --simple "{extractPath}"'
248 | subprocess.run(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
249 | print(f'Extracting chapters to the file "{extractPath}".\nProgress: 100%')
250 | elif len(jsonData.get('chapters')) == 0:
251 | print(f'No chapters available in "{os.path.basename(mediaFile)}"')
252 |
253 | #run commands for extract attachments
254 |
255 | def runAttachments(mediaFile):
256 | global i
257 | commandList = []
258 | get_output(mediaFile)
259 | if len(jsonData.get('attachments')) > 0:
260 | makeFolder(mediaFile)
261 | viewAttachments(mediaFile)
262 | attachmentID = input(f'\nattachmentID: ')
263 | attachmentID = attachmentID.split(', ')
264 | for i in attachmentID:
265 | i = int(i) - 1
266 | get_attachments(mediaFile)
267 | extractPath = os.path.join(extractFolder, attach_name)
268 | extractParam = f'{attach_id}:"{extractPath}"'
269 | commandList.append(extractParam)
270 | extractParam = ' '.join(commandList)
271 | command = f'mkvextract "{mediaFile}" attachments {extractParam}'
272 | process = subprocess.run(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
273 | print('\n' + process.stdout.decode())
274 | elif len(jsonData.get('attachments')) == 0:
275 | print(f'No attachments available in "{os.path.basename(mediaFile)}"')
276 |
277 | #run commands for extract timestamps for all tracks
278 |
279 | def runTimestamps(mediaFile):
280 | global i
281 | commandList = []
282 | makeFolder(mediaFile)
283 | get_output(mediaFile)
284 | trackCount = len(jsonData['tracks'])
285 | for i in range(trackCount):
286 | get_tracks(mediaFile)
287 | extractName = f'TrackID_{id}_[{track_type}]_[tc].txt'
288 | extractPath = os.path.join(extractFolder, extractName)
289 | extractParam = f'{id}:"{extractPath}"'
290 | commandList.append(extractParam)
291 | extractParam = ' '.join(commandList)
292 | command = f'mkvextract "{mediaFile}" timecodes_v2 {extractParam}'
293 | process = subprocess.run(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
294 | print(process.stdout.decode())
295 |
296 | #run commands for extract cues for all tracks
297 |
298 | def runCues(mediaFile):
299 | global i
300 | commandList = []
301 | makeFolder(mediaFile)
302 | get_output(mediaFile)
303 | trackCount = len(jsonData['tracks'])
304 | for i in range(trackCount):
305 | get_tracks(mediaFile)
306 | extractName = f'TrackID_{id}_[{track_type}]_[cues].txt'
307 | extractPath = os.path.join(extractFolder, extractName)
308 | extractParam = f'{id}:"{extractPath}"'
309 | commandList.append(extractParam)
310 | extractParam = ' '.join(commandList)
311 | command = f'mkvextract "{mediaFile}" cues {extractParam}'
312 | process = subprocess.run(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
313 | print(process.stdout.decode())
314 |
315 | #run commands for extract cue sheet
316 |
317 | def runCueSheet(mediaFile):
318 | makeFolder(mediaFile)
319 | extractPath = os.path.join(extractFolder, 'Cue_Sheet.cue')
320 | command = f'mkvextract "{mediaFile}" cuesheet "{extractPath}"'
321 | process = subprocess.run(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
322 | print(process.stdout.decode())
323 |
324 | #run commands for extract tags
325 |
326 | def runTags(mediaFile):
327 | makeFolder(mediaFile)
328 | get_output(mediaFile)
329 | if (len(jsonData.get('global_tags')) > 0) or (len(jsonData.get('track_tags')) > 0):
330 | extractPath = os.path.join(extractFolder, 'Tags.xml')
331 | command = f'mkvextract "{mediaFile}" tags "{extractPath}"'
332 | process = subprocess.run(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
333 | print(f'Extracting tags to the file "{extractPath}".\nProgress: 100%')
334 | elif (len(jsonData.get('global_tags')) == 0) and (len(jsonData.get('track_tags')) == 0):
335 | print(f'No tags available in "{os.path.basename(mediaFile)}"')
336 |
337 | #main screen for running the script
338 |
339 | def main():
340 | inputProcess()
341 | mediaList = mkvList + webmList
342 | mediaList = sorted(mediaList)
343 |
344 | extractMode = int(input(
345 | f'\nmkvextractor (MKVToolNix : mkvextract)\
346 | \n|\
347 | \n|-- 1 : Extract All Tracks\
348 | \n|-- 2 : Extract Single Tracks\
349 | \n|-- 3 : Extract Chapters\
350 | \n|-- 4 : Extract Attachments\
351 | \n|-- 5 : Extract Timestamps\
352 | \n|-- 6 : Extract Cues\
353 | \n|-- 7 : Extract Cue Sheet\
354 | \n|-- 8 : Extract Tags\
355 | \n\
356 | \nextractMode: '
357 | ))
358 |
359 | print(' ')
360 |
361 | if extractMode == 1:
362 | for file in mediaList: runTracks(file)
363 | elif extractMode == 2:
364 | for file in mediaList: runTrack(file)
365 | elif extractMode == 3:
366 | global chaptersMode
367 | chaptersMode = int(input(
368 | f'1 : XML Chapters\
369 | \n2 : OGM Chapters\
370 | \n\nchaptersMode: '))
371 | print(' ')
372 | for file in mediaList: runChapters(file)
373 | elif extractMode == 4:
374 | for file in mediaList: runAttachments(file)
375 | elif extractMode == 5:
376 | for file in mediaList: runTimestamps(file)
377 | elif extractMode == 6:
378 | for file in mediaList: runCues(file)
379 | elif extractMode == 7:
380 | for file in mediaList: runCueSheet(file)
381 | elif extractMode == 8:
382 | for file in mediaList: runTags(file)
383 |
384 | #run script
385 |
386 | if __name__ == "__main__":
387 | main()
388 |
--------------------------------------------------------------------------------