├── LICENSE
├── README.md
└── mkvextractor.py


/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 dropcreations
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | <!-- PROJECT INTRO -->
 2 | 
 3 | __mkvExtractor__
 4 | =========
 5 | 
 6 | This python script is to use __MKVToolNix's mkvextract__ CLI tool.
 7 | You can extract content from both __MKV__ and __WebM__ containers.
 8 | 
 9 | ## __Usage__
10 | 
11 | - Open __Terminal__ and type below command.
12 | - You can add one or more files at once.
13 | ```shell
14 | python mkvextractor.py [file_01] [file_02] [file_03]...
15 | ```
16 | - You can also add a folder that includes MKV and WebM files.
17 | - Don't add more than one folder.
18 | ```shell
19 | python mkvextractor.py [folder_path]
20 | ```
21 | - You can extract,
22 |     - [__All tracks__](#extract-mode--all-tracks)
23 |     - [__Single tracks__](#extract-mode--single-tracks)
24 |     - [__Chapters__](#extract-mode--chapters)
25 |     - [__Attachments__](#extract-mode--attachments)
26 |     - [__Timestamps__](#extract-mode--timestamps)
27 |     - [__Cues__](#extract-mode--cues)
28 |     - [__Cue Sheets__](#extract-mode--cue-sheets)
29 |     - [__Tags__](#extract-mode--tags)
30 | 
31 | ### __Extract Mode : All tracks__
32 | 
33 | You can extract all video, audio and subtitle tracks available in all inputs.
34 | 
35 | ### __Extract Mode : Single tracks__
36 | 
37 | In this mode, analyze every input and show a list of tracks that available, then you can enter track IDs that you want to extract.
38 | Please seperate track numbers by a comma and a space<br>
39 | - eg: `trackID: 0, 1, 2,...`
40 | 
41 | ### __Extract Mode : Chapters__
42 | 
43 | You can extract chapters in both XML and OGM formats. Provide your choice when it asked.
44 | 
45 | ### __Extract Mode : Attachments__
46 | 
47 | In this mode, also analyze every input and show a list of attachments that available, then you can enter attachment IDs that you want to extract.
48 | Please seperate attachment IDs by a comma and a space<br>
49 | - eg: `attachmentID: 1, 2, 3,...`
50 | 
51 | ### __Extract Mode : Timestamps__
52 | 
53 | You can extract timestamps for all tracks at once.
54 | 
55 | ### __Extract Mode : Cues__
56 | 
57 | You can extract cues for all tracks that available at once.
58 | 
59 | ### __Extract Mode : Cue Sheets__
60 | 
61 | You can extract cue sheet in all inputs if available.
62 | 
63 | ### __Extract Mode : Tags__
64 | 
65 | You can extract tags in all inputs if available.
66 | 


--------------------------------------------------------------------------------
/mkvextractor.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import json
  4 | import subprocess
  5 | 
  6 | inputCount = len(sys.argv)
  7 | 
  8 | mkvList = []
  9 | webmList = []
 10 | 
 11 | #process all inputs and get 'mkv' and 'webm' files.
 12 | 
 13 | def inputProcess():
 14 |     if inputCount == 2:
 15 |         if os.path.isfile(sys.argv[1]) is False:
 16 |             for inputFile in os.listdir(sys.argv[1]):
 17 |                 if os.path.splitext(inputFile)[1] == '.mkv':
 18 |                     mkvList.append(inputFile)
 19 |                 elif os.path.splitext(inputFile)[1] == '.webm':
 20 |                     webmList.append(inputFile)
 21 |         else:
 22 |             mkvList.append(sys.argv[1])
 23 |     elif inputCount > 2:
 24 |         for inputID in range(1, inputCount):
 25 |             if os.path.splitext(sys.argv[inputID])[1] == '.mkv':
 26 |                 mkvList.append(sys.argv[inputID])
 27 |             elif os.path.splitext(sys.argv[inputID])[1] == '.webm':
 28 |                 webmList.append(sys.argv[inputID])
 29 |     else:
 30 |         print(f'Please provide inputs...')
 31 | 
 32 | #get stream information in json format.
 33 | 
 34 | def get_output(mediaFile):
 35 |     global jsonData
 36 |     mkvmerge_JSON = subprocess.check_output(
 37 |         [
 38 |             'mkvmerge',
 39 |             '--identify',
 40 |             '--identification-format',
 41 |             'json',
 42 |             os.path.abspath(mediaFile),
 43 |         ],
 44 |         stderr=subprocess.DEVNULL
 45 |     )
 46 |     jsonData = json.loads(mkvmerge_JSON)
 47 | 
 48 | #parse data from json output
 49 | 
 50 | def get_tracks(mediaFile):
 51 |     get_output(mediaFile)
 52 |     global id; id = jsonData.get('tracks')[int(i)].get('id')
 53 |     global language; language = jsonData.get('tracks')[int(i)].get('properties').get('language')
 54 |     global language_ietf; language_ietf = jsonData.get('tracks')[int(i)].get('properties').get('language_ietf')
 55 |     global title; title = jsonData.get('tracks')[int(i)].get('properties').get('track_name')
 56 |     global codec_id; codec_id = jsonData.get('tracks')[int(i)].get('properties').get('codec_id')
 57 |     global codec; codec = jsonData.get('tracks')[int(i)].get('codec')
 58 |     global track_type; track_type = jsonData.get('tracks')[int(i)].get('type')
 59 | 
 60 | def get_attachments(mediaFile):
 61 |     get_output(mediaFile)
 62 |     global attach_id; attach_id = jsonData.get('attachments')[int(i)].get('id')
 63 |     global attach_type; attach_type = jsonData.get('attachments')[int(i)].get('content_type')
 64 |     global attach_desc; attach_desc = jsonData.get('attachments')[int(i)].get('description')
 65 |     global attach_name; attach_name = jsonData.get('attachments')[int(i)].get('file_name')
 66 |     global attach_uid; attach_uid = jsonData.get('attachments')[int(i)].get('properties').get('uid')
 67 | 
 68 | #List available tracks
 69 | 
 70 | def viewTracks(mediaFile):
 71 |     global i
 72 |     get_output(mediaFile)
 73 |     trackCount = len(jsonData['tracks'])
 74 |     print(os.path.basename(mediaFile))
 75 |     for i in range(trackCount):
 76 |         get_tracks(mediaFile)
 77 |         print(f'\nTrack ID : {id}')
 78 |         print(f'  |')
 79 |         print(f'  |--Type           : {track_type}')
 80 |         print(f'  |--Codec          : {codec}')
 81 |         print(f'  |--Language       : {language}')
 82 |         print(f'  |--Language_ietf  : {language_ietf}')
 83 |         print(f'  |--Title          : {title}')
 84 | 
 85 | #List available attachments
 86 | 
 87 | def viewAttachments(mediaFile):
 88 |     global i
 89 |     get_output(mediaFile)
 90 |     attachmentCount = len(jsonData['attachments'])
 91 |     print(os.path.basename(mediaFile))
 92 |     for i in range(attachmentCount):
 93 |         get_attachments(mediaFile)
 94 |         print(f'\nAttachment ID : {attach_id}')
 95 |         print(f'  |')
 96 |         print(f'  |--ContentType    : {attach_type}')
 97 |         print(f'  |--Filename       : {attach_name}')
 98 |         print(f'  |--Description    : {attach_desc}')
 99 |         print(f'  |--UID            : {attach_uid}')
100 | 
101 | #process the input file
102 | 
103 | def processFile(mediaFile):
104 |     global extractName
105 | 
106 |     if track_type == 'video':
107 |         get_output(mediaFile)
108 |         pixel_dimensions = jsonData.get('tracks')[int(i)].get('properties').get('pixel_dimensions')
109 |         extractName = f'TrackID_{id}_[{track_type}]_[{pixel_dimensions}]_[{language}]'
110 |     elif track_type == 'audio':
111 |         get_output(mediaFile)
112 |         audio_channels = jsonData.get('tracks')[int(i)].get('properties').get('audio_channels')
113 |         audio_sampling_frequency = jsonData.get('tracks')[int(i)].get('properties').get('audio_sampling_frequency')
114 |         extractName = f'TrackID_{id}_[{track_type}]_[{audio_channels}CH]_[{audio_sampling_frequency / 1000}kHz]_[{language}]'
115 |     elif track_type == "subtitles":
116 |         extractName = f'TrackID_{id}_[{track_type}]_[{language}]'
117 |     
118 |     if "AVC" in codec_id:
119 |         extractName = extractName + ".264"
120 |     elif "HEVC" in codec_id:
121 |         extractName = extractName + ".hevc"
122 |     elif "V_VP8" in codec_id:
123 |         extractName = extractName + ".ivf"
124 |     elif "V_VP9" in codec_id:
125 |         extractName = extractName + ".ivf"
126 |     elif "V_AV1" in codec_id:
127 |         extractName = extractName + ".ivf"
128 |     elif "V_MPEG1" in codec_id:
129 |         extractName = extractName + ".mpg"
130 |     elif "V_MPEG2" in codec_id:
131 |         extractName = extractName + ".mpg"
132 |     elif "V_REAL" in codec_id:
133 |         extractName = extractName + ".rm"
134 |     elif "V_THEORA" in codec_id:
135 |         extractName = extractName + ".ogg"
136 |     elif "V_MS/VFW/FOURCC" in codec_id:
137 |         extractName = extractName + ".avi"
138 |     elif "AAC" in codec_id:
139 |         extractName = extractName + ".aac"
140 |     elif "A_AC3" in codec_id:
141 |         extractName = extractName + ".ac3"
142 |     elif "A_EAC3" in codec_id:
143 |         extractName = extractName + ".eac3"
144 |     elif "ALAC" in codec_id:
145 |         extractName = extractName + ".caf"
146 |     elif "DTS" in codec_id:
147 |         extractName = extractName + ".dts"
148 |     elif "FLAC" in codec_id:
149 |         extractName = extractName + ".flac"
150 |     elif "MPEG/L2" in codec_id:
151 |         extractName = extractName + ".mp2"
152 |     elif "MPEG/L3" in codec_id:
153 |         extractName = extractName + ".mp3"
154 |     elif "OPUS" in codec_id:
155 |         extractName = extractName + ".ogg"
156 |     elif "PCM" in codec_id:
157 |         extractName = extractName + ".wav"
158 |     elif "REAL" in codec_id:
159 |         extractName = extractName + ".ra"
160 |     elif "TRUEHD" in codec_id:
161 |         extractName = extractName + ".thd"
162 |     elif "MLP" in codec_id:
163 |         extractName = extractName + ".mlp"
164 |     elif "TTA1" in codec_id:
165 |         extractName = extractName + ".tta"
166 |     elif "VORBIS" in codec_id:
167 |         extractName = extractName + ".ogg"
168 |     elif "WAVPACK4" in codec_id:
169 |         extractName = extractName + ".wv"
170 |     elif "PGS" in codec_id:
171 |         extractName = extractName + ".sup"
172 |     elif "ASS" in codec_id:
173 |         extractName = extractName + ".ass"
174 |     elif "SSA" in codec_id:
175 |         extractName = extractName + ".ssa"
176 |     elif "UTF8" in codec_id:
177 |         extractName = extractName + ".srt"
178 |     elif "ASCII" in codec_id:
179 |         extractName = extractName + ".srt"
180 |     elif "VOBSUB" in codec_id:
181 |         extractName = extractName + ".sub"
182 |     elif "S_KATE" in codec_id:
183 |         extractName = extractName + ".ogg"
184 |     elif "USF" in codec_id:
185 |         extractName = extractName + ".usf"
186 |     elif "WEBVTT" in codec_id:
187 |         extractName = extractName + ".vtt"
188 | 
189 | #make the items extract folder
190 | 
191 | def makeFolder(mediaFile):
192 |     mediaFolder = os.path.dirname(mediaFile)
193 |     mediaName = os.path.splitext(os.path.basename(mediaFile))[0]
194 |     global extractFolder; extractFolder = os.path.join(mediaFolder, mediaName)
195 |     os.makedirs(extractFolder, exist_ok=True)
196 | 
197 | #run commands for extract all tracks available
198 | 
199 | def runTracks(mediaFile):
200 |     global i
201 |     commandList = []
202 |     makeFolder(mediaFile)
203 |     get_output(mediaFile)
204 |     trackCount = len(jsonData['tracks'])
205 |     for i in range(trackCount):
206 |         get_tracks(mediaFile)
207 |         processFile(mediaFile)
208 |         extractPath = os.path.join(extractFolder, extractName)
209 |         extractParam = f'{id}:"{extractPath}"'
210 |         commandList.append(extractParam)
211 |     extractParam = ' '.join(commandList)
212 |     command = f'mkvextract "{mediaFile}" tracks {extractParam}'
213 |     process = subprocess.run(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
214 |     print(process.stdout.decode())
215 | 
216 | #run commands for extract a specific track(s)
217 | 
218 | def runTrack(mediaFile):
219 |     global i
220 |     commandList = []
221 |     makeFolder(mediaFile)
222 |     viewTracks(mediaFile)
223 |     trackID = input(f'\ntrackID: ')
224 |     trackID = trackID.split(', ')
225 |     for i in trackID:
226 |         get_tracks(mediaFile)
227 |         processFile(mediaFile)
228 |         extractPath = os.path.join(extractFolder, extractName)
229 |         extractParam = f'{id}:"{extractPath}"'
230 |         commandList.append(extractParam)
231 |     extractParam = ' '.join(commandList)
232 |     command = f'mkvextract "{mediaFile}" tracks {extractParam}'
233 |     process = subprocess.run(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
234 |     print('\n' + process.stdout.decode())
235 | 
236 | #run commands for extract chapters
237 | 
238 | def runChapters(mediaFile):
239 |     get_output(mediaFile)
240 |     if len(jsonData.get('chapters')) > 0:
241 |         makeFolder(mediaFile)
242 |         if chaptersMode == 1:
243 |             extractPath = os.path.join(extractFolder, 'Chapters_XML.xml')
244 |             command = f'mkvextract "{mediaFile}" chapters "{extractPath}"'
245 |         if chaptersMode == 2:
246 |             extractPath = os.path.join(extractFolder, 'Chapters_OGM.txt')
247 |             command = f'mkvextract "{mediaFile}" chapters --simple "{extractPath}"'
248 |         subprocess.run(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
249 |         print(f'Extracting chapters to the file "{extractPath}".\nProgress: 100%')
250 |     elif len(jsonData.get('chapters')) == 0:
251 |         print(f'No chapters available in "{os.path.basename(mediaFile)}"')
252 | 
253 | #run commands for extract attachments
254 | 
255 | def runAttachments(mediaFile):
256 |     global i
257 |     commandList = []
258 |     get_output(mediaFile)
259 |     if len(jsonData.get('attachments')) > 0:
260 |         makeFolder(mediaFile)
261 |         viewAttachments(mediaFile)
262 |         attachmentID = input(f'\nattachmentID: ')
263 |         attachmentID = attachmentID.split(', ')
264 |         for i in attachmentID:
265 |             i = int(i) - 1
266 |             get_attachments(mediaFile)
267 |             extractPath = os.path.join(extractFolder, attach_name)
268 |             extractParam = f'{attach_id}:"{extractPath}"'
269 |             commandList.append(extractParam)
270 |         extractParam = ' '.join(commandList)
271 |         command = f'mkvextract "{mediaFile}" attachments {extractParam}'
272 |         process = subprocess.run(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
273 |         print('\n' + process.stdout.decode())
274 |     elif len(jsonData.get('attachments')) == 0:
275 |         print(f'No attachments available in "{os.path.basename(mediaFile)}"')
276 | 
277 | #run commands for extract timestamps for all tracks
278 | 
279 | def runTimestamps(mediaFile):
280 |     global i
281 |     commandList = []
282 |     makeFolder(mediaFile)
283 |     get_output(mediaFile)
284 |     trackCount = len(jsonData['tracks'])
285 |     for i in range(trackCount):
286 |         get_tracks(mediaFile)
287 |         extractName = f'TrackID_{id}_[{track_type}]_[tc].txt'
288 |         extractPath = os.path.join(extractFolder, extractName)
289 |         extractParam = f'{id}:"{extractPath}"'
290 |         commandList.append(extractParam)
291 |     extractParam = ' '.join(commandList)
292 |     command = f'mkvextract "{mediaFile}" timecodes_v2 {extractParam}'
293 |     process = subprocess.run(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
294 |     print(process.stdout.decode())
295 | 
296 | #run commands for extract cues for all tracks
297 | 
298 | def runCues(mediaFile):
299 |     global i
300 |     commandList = []
301 |     makeFolder(mediaFile)
302 |     get_output(mediaFile)
303 |     trackCount = len(jsonData['tracks'])
304 |     for i in range(trackCount):
305 |         get_tracks(mediaFile)
306 |         extractName = f'TrackID_{id}_[{track_type}]_[cues].txt'
307 |         extractPath = os.path.join(extractFolder, extractName)
308 |         extractParam = f'{id}:"{extractPath}"'
309 |         commandList.append(extractParam)
310 |     extractParam = ' '.join(commandList)
311 |     command = f'mkvextract "{mediaFile}" cues {extractParam}'
312 |     process = subprocess.run(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
313 |     print(process.stdout.decode())
314 | 
315 | #run commands for extract cue sheet
316 | 
317 | def runCueSheet(mediaFile):
318 |     makeFolder(mediaFile)
319 |     extractPath = os.path.join(extractFolder, 'Cue_Sheet.cue')
320 |     command = f'mkvextract "{mediaFile}" cuesheet "{extractPath}"'
321 |     process = subprocess.run(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
322 |     print(process.stdout.decode())
323 | 
324 | #run commands for extract tags
325 | 
326 | def runTags(mediaFile):
327 |     makeFolder(mediaFile)
328 |     get_output(mediaFile)
329 |     if (len(jsonData.get('global_tags')) > 0) or (len(jsonData.get('track_tags')) > 0):
330 |         extractPath = os.path.join(extractFolder, 'Tags.xml')
331 |         command = f'mkvextract "{mediaFile}" tags "{extractPath}"'
332 |         process = subprocess.run(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
333 |         print(f'Extracting tags to the file "{extractPath}".\nProgress: 100%')
334 |     elif (len(jsonData.get('global_tags')) == 0) and (len(jsonData.get('track_tags')) == 0):
335 |         print(f'No tags available in "{os.path.basename(mediaFile)}"')
336 | 
337 | #main screen for running the script
338 | 
339 | def main():
340 |     inputProcess()
341 |     mediaList = mkvList + webmList
342 |     mediaList = sorted(mediaList)
343 | 
344 |     extractMode = int(input(
345 |         f'\nmkvextractor (MKVToolNix : mkvextract)\
346 |         \n|\
347 |         \n|-- 1 : Extract All Tracks\
348 |         \n|-- 2 : Extract Single Tracks\
349 |         \n|-- 3 : Extract Chapters\
350 |         \n|-- 4 : Extract Attachments\
351 |         \n|-- 5 : Extract Timestamps\
352 |         \n|-- 6 : Extract Cues\
353 |         \n|-- 7 : Extract Cue Sheet\
354 |         \n|-- 8 : Extract Tags\
355 |         \n\
356 |         \nextractMode: '
357 |     ))
358 |     
359 |     print(' ')
360 | 
361 |     if extractMode == 1:
362 |         for file in mediaList: runTracks(file)
363 |     elif extractMode == 2:
364 |         for file in mediaList: runTrack(file)
365 |     elif extractMode == 3:
366 |         global chaptersMode
367 |         chaptersMode = int(input(
368 |             f'1 : XML Chapters\
369 |             \n2 : OGM Chapters\
370 |             \n\nchaptersMode: '))
371 |         print(' ')
372 |         for file in mediaList: runChapters(file)
373 |     elif extractMode == 4:
374 |         for file in mediaList: runAttachments(file)
375 |     elif extractMode == 5:
376 |         for file in mediaList: runTimestamps(file)
377 |     elif extractMode == 6:
378 |         for file in mediaList: runCues(file)
379 |     elif extractMode == 7:
380 |         for file in mediaList: runCueSheet(file)
381 |     elif extractMode == 8:
382 |         for file in mediaList: runTags(file)
383 | 
384 | #run script
385 | 
386 | if __name__ == "__main__":
387 |     main()
388 | 


--------------------------------------------------------------------------------