├── LICENSE ├── README.md └── mkvextractor.py /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 dropcreations 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | __mkvExtractor__ 4 | ========= 5 | 6 | This python script is to use __MKVToolNix's mkvextract__ CLI tool. 7 | You can extract content from both __MKV__ and __WebM__ containers. 8 | 9 | ## __Usage__ 10 | 11 | - Open __Terminal__ and type below command. 12 | - You can add one or more files at once. 13 | ```shell 14 | python mkvextractor.py [file_01] [file_02] [file_03]... 15 | ``` 16 | - You can also add a folder that includes MKV and WebM files. 17 | - Don't add more than one folder. 18 | ```shell 19 | python mkvextractor.py [folder_path] 20 | ``` 21 | - You can extract, 22 | - [__All tracks__](#extract-mode--all-tracks) 23 | - [__Single tracks__](#extract-mode--single-tracks) 24 | - [__Chapters__](#extract-mode--chapters) 25 | - [__Attachments__](#extract-mode--attachments) 26 | - [__Timestamps__](#extract-mode--timestamps) 27 | - [__Cues__](#extract-mode--cues) 28 | - [__Cue Sheets__](#extract-mode--cue-sheets) 29 | - [__Tags__](#extract-mode--tags) 30 | 31 | ### __Extract Mode : All tracks__ 32 | 33 | You can extract all video, audio and subtitle tracks available in all inputs. 34 | 35 | ### __Extract Mode : Single tracks__ 36 | 37 | In this mode, analyze every input and show a list of tracks that available, then you can enter track IDs that you want to extract. 38 | Please seperate track numbers by a comma and a space
39 | - eg: `trackID: 0, 1, 2,...` 40 | 41 | ### __Extract Mode : Chapters__ 42 | 43 | You can extract chapters in both XML and OGM formats. Provide your choice when it asked. 44 | 45 | ### __Extract Mode : Attachments__ 46 | 47 | In this mode, also analyze every input and show a list of attachments that available, then you can enter attachment IDs that you want to extract. 48 | Please seperate attachment IDs by a comma and a space
49 | - eg: `attachmentID: 1, 2, 3,...` 50 | 51 | ### __Extract Mode : Timestamps__ 52 | 53 | You can extract timestamps for all tracks at once. 54 | 55 | ### __Extract Mode : Cues__ 56 | 57 | You can extract cues for all tracks that available at once. 58 | 59 | ### __Extract Mode : Cue Sheets__ 60 | 61 | You can extract cue sheet in all inputs if available. 62 | 63 | ### __Extract Mode : Tags__ 64 | 65 | You can extract tags in all inputs if available. 66 | -------------------------------------------------------------------------------- /mkvextractor.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import json 4 | import subprocess 5 | 6 | inputCount = len(sys.argv) 7 | 8 | mkvList = [] 9 | webmList = [] 10 | 11 | #process all inputs and get 'mkv' and 'webm' files. 12 | 13 | def inputProcess(): 14 | if inputCount == 2: 15 | if os.path.isfile(sys.argv[1]) is False: 16 | for inputFile in os.listdir(sys.argv[1]): 17 | if os.path.splitext(inputFile)[1] == '.mkv': 18 | mkvList.append(inputFile) 19 | elif os.path.splitext(inputFile)[1] == '.webm': 20 | webmList.append(inputFile) 21 | else: 22 | mkvList.append(sys.argv[1]) 23 | elif inputCount > 2: 24 | for inputID in range(1, inputCount): 25 | if os.path.splitext(sys.argv[inputID])[1] == '.mkv': 26 | mkvList.append(sys.argv[inputID]) 27 | elif os.path.splitext(sys.argv[inputID])[1] == '.webm': 28 | webmList.append(sys.argv[inputID]) 29 | else: 30 | print(f'Please provide inputs...') 31 | 32 | #get stream information in json format. 33 | 34 | def get_output(mediaFile): 35 | global jsonData 36 | mkvmerge_JSON = subprocess.check_output( 37 | [ 38 | 'mkvmerge', 39 | '--identify', 40 | '--identification-format', 41 | 'json', 42 | os.path.abspath(mediaFile), 43 | ], 44 | stderr=subprocess.DEVNULL 45 | ) 46 | jsonData = json.loads(mkvmerge_JSON) 47 | 48 | #parse data from json output 49 | 50 | def get_tracks(mediaFile): 51 | get_output(mediaFile) 52 | global id; id = jsonData.get('tracks')[int(i)].get('id') 53 | global language; language = jsonData.get('tracks')[int(i)].get('properties').get('language') 54 | global language_ietf; language_ietf = jsonData.get('tracks')[int(i)].get('properties').get('language_ietf') 55 | global title; title = jsonData.get('tracks')[int(i)].get('properties').get('track_name') 56 | global codec_id; codec_id = jsonData.get('tracks')[int(i)].get('properties').get('codec_id') 57 | global codec; codec = jsonData.get('tracks')[int(i)].get('codec') 58 | global track_type; track_type = jsonData.get('tracks')[int(i)].get('type') 59 | 60 | def get_attachments(mediaFile): 61 | get_output(mediaFile) 62 | global attach_id; attach_id = jsonData.get('attachments')[int(i)].get('id') 63 | global attach_type; attach_type = jsonData.get('attachments')[int(i)].get('content_type') 64 | global attach_desc; attach_desc = jsonData.get('attachments')[int(i)].get('description') 65 | global attach_name; attach_name = jsonData.get('attachments')[int(i)].get('file_name') 66 | global attach_uid; attach_uid = jsonData.get('attachments')[int(i)].get('properties').get('uid') 67 | 68 | #List available tracks 69 | 70 | def viewTracks(mediaFile): 71 | global i 72 | get_output(mediaFile) 73 | trackCount = len(jsonData['tracks']) 74 | print(os.path.basename(mediaFile)) 75 | for i in range(trackCount): 76 | get_tracks(mediaFile) 77 | print(f'\nTrack ID : {id}') 78 | print(f' |') 79 | print(f' |--Type : {track_type}') 80 | print(f' |--Codec : {codec}') 81 | print(f' |--Language : {language}') 82 | print(f' |--Language_ietf : {language_ietf}') 83 | print(f' |--Title : {title}') 84 | 85 | #List available attachments 86 | 87 | def viewAttachments(mediaFile): 88 | global i 89 | get_output(mediaFile) 90 | attachmentCount = len(jsonData['attachments']) 91 | print(os.path.basename(mediaFile)) 92 | for i in range(attachmentCount): 93 | get_attachments(mediaFile) 94 | print(f'\nAttachment ID : {attach_id}') 95 | print(f' |') 96 | print(f' |--ContentType : {attach_type}') 97 | print(f' |--Filename : {attach_name}') 98 | print(f' |--Description : {attach_desc}') 99 | print(f' |--UID : {attach_uid}') 100 | 101 | #process the input file 102 | 103 | def processFile(mediaFile): 104 | global extractName 105 | 106 | if track_type == 'video': 107 | get_output(mediaFile) 108 | pixel_dimensions = jsonData.get('tracks')[int(i)].get('properties').get('pixel_dimensions') 109 | extractName = f'TrackID_{id}_[{track_type}]_[{pixel_dimensions}]_[{language}]' 110 | elif track_type == 'audio': 111 | get_output(mediaFile) 112 | audio_channels = jsonData.get('tracks')[int(i)].get('properties').get('audio_channels') 113 | audio_sampling_frequency = jsonData.get('tracks')[int(i)].get('properties').get('audio_sampling_frequency') 114 | extractName = f'TrackID_{id}_[{track_type}]_[{audio_channels}CH]_[{audio_sampling_frequency / 1000}kHz]_[{language}]' 115 | elif track_type == "subtitles": 116 | extractName = f'TrackID_{id}_[{track_type}]_[{language}]' 117 | 118 | if "AVC" in codec_id: 119 | extractName = extractName + ".264" 120 | elif "HEVC" in codec_id: 121 | extractName = extractName + ".hevc" 122 | elif "V_VP8" in codec_id: 123 | extractName = extractName + ".ivf" 124 | elif "V_VP9" in codec_id: 125 | extractName = extractName + ".ivf" 126 | elif "V_AV1" in codec_id: 127 | extractName = extractName + ".ivf" 128 | elif "V_MPEG1" in codec_id: 129 | extractName = extractName + ".mpg" 130 | elif "V_MPEG2" in codec_id: 131 | extractName = extractName + ".mpg" 132 | elif "V_REAL" in codec_id: 133 | extractName = extractName + ".rm" 134 | elif "V_THEORA" in codec_id: 135 | extractName = extractName + ".ogg" 136 | elif "V_MS/VFW/FOURCC" in codec_id: 137 | extractName = extractName + ".avi" 138 | elif "AAC" in codec_id: 139 | extractName = extractName + ".aac" 140 | elif "A_AC3" in codec_id: 141 | extractName = extractName + ".ac3" 142 | elif "A_EAC3" in codec_id: 143 | extractName = extractName + ".eac3" 144 | elif "ALAC" in codec_id: 145 | extractName = extractName + ".caf" 146 | elif "DTS" in codec_id: 147 | extractName = extractName + ".dts" 148 | elif "FLAC" in codec_id: 149 | extractName = extractName + ".flac" 150 | elif "MPEG/L2" in codec_id: 151 | extractName = extractName + ".mp2" 152 | elif "MPEG/L3" in codec_id: 153 | extractName = extractName + ".mp3" 154 | elif "OPUS" in codec_id: 155 | extractName = extractName + ".ogg" 156 | elif "PCM" in codec_id: 157 | extractName = extractName + ".wav" 158 | elif "REAL" in codec_id: 159 | extractName = extractName + ".ra" 160 | elif "TRUEHD" in codec_id: 161 | extractName = extractName + ".thd" 162 | elif "MLP" in codec_id: 163 | extractName = extractName + ".mlp" 164 | elif "TTA1" in codec_id: 165 | extractName = extractName + ".tta" 166 | elif "VORBIS" in codec_id: 167 | extractName = extractName + ".ogg" 168 | elif "WAVPACK4" in codec_id: 169 | extractName = extractName + ".wv" 170 | elif "PGS" in codec_id: 171 | extractName = extractName + ".sup" 172 | elif "ASS" in codec_id: 173 | extractName = extractName + ".ass" 174 | elif "SSA" in codec_id: 175 | extractName = extractName + ".ssa" 176 | elif "UTF8" in codec_id: 177 | extractName = extractName + ".srt" 178 | elif "ASCII" in codec_id: 179 | extractName = extractName + ".srt" 180 | elif "VOBSUB" in codec_id: 181 | extractName = extractName + ".sub" 182 | elif "S_KATE" in codec_id: 183 | extractName = extractName + ".ogg" 184 | elif "USF" in codec_id: 185 | extractName = extractName + ".usf" 186 | elif "WEBVTT" in codec_id: 187 | extractName = extractName + ".vtt" 188 | 189 | #make the items extract folder 190 | 191 | def makeFolder(mediaFile): 192 | mediaFolder = os.path.dirname(mediaFile) 193 | mediaName = os.path.splitext(os.path.basename(mediaFile))[0] 194 | global extractFolder; extractFolder = os.path.join(mediaFolder, mediaName) 195 | os.makedirs(extractFolder, exist_ok=True) 196 | 197 | #run commands for extract all tracks available 198 | 199 | def runTracks(mediaFile): 200 | global i 201 | commandList = [] 202 | makeFolder(mediaFile) 203 | get_output(mediaFile) 204 | trackCount = len(jsonData['tracks']) 205 | for i in range(trackCount): 206 | get_tracks(mediaFile) 207 | processFile(mediaFile) 208 | extractPath = os.path.join(extractFolder, extractName) 209 | extractParam = f'{id}:"{extractPath}"' 210 | commandList.append(extractParam) 211 | extractParam = ' '.join(commandList) 212 | command = f'mkvextract "{mediaFile}" tracks {extractParam}' 213 | process = subprocess.run(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) 214 | print(process.stdout.decode()) 215 | 216 | #run commands for extract a specific track(s) 217 | 218 | def runTrack(mediaFile): 219 | global i 220 | commandList = [] 221 | makeFolder(mediaFile) 222 | viewTracks(mediaFile) 223 | trackID = input(f'\ntrackID: ') 224 | trackID = trackID.split(', ') 225 | for i in trackID: 226 | get_tracks(mediaFile) 227 | processFile(mediaFile) 228 | extractPath = os.path.join(extractFolder, extractName) 229 | extractParam = f'{id}:"{extractPath}"' 230 | commandList.append(extractParam) 231 | extractParam = ' '.join(commandList) 232 | command = f'mkvextract "{mediaFile}" tracks {extractParam}' 233 | process = subprocess.run(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) 234 | print('\n' + process.stdout.decode()) 235 | 236 | #run commands for extract chapters 237 | 238 | def runChapters(mediaFile): 239 | get_output(mediaFile) 240 | if len(jsonData.get('chapters')) > 0: 241 | makeFolder(mediaFile) 242 | if chaptersMode == 1: 243 | extractPath = os.path.join(extractFolder, 'Chapters_XML.xml') 244 | command = f'mkvextract "{mediaFile}" chapters "{extractPath}"' 245 | if chaptersMode == 2: 246 | extractPath = os.path.join(extractFolder, 'Chapters_OGM.txt') 247 | command = f'mkvextract "{mediaFile}" chapters --simple "{extractPath}"' 248 | subprocess.run(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) 249 | print(f'Extracting chapters to the file "{extractPath}".\nProgress: 100%') 250 | elif len(jsonData.get('chapters')) == 0: 251 | print(f'No chapters available in "{os.path.basename(mediaFile)}"') 252 | 253 | #run commands for extract attachments 254 | 255 | def runAttachments(mediaFile): 256 | global i 257 | commandList = [] 258 | get_output(mediaFile) 259 | if len(jsonData.get('attachments')) > 0: 260 | makeFolder(mediaFile) 261 | viewAttachments(mediaFile) 262 | attachmentID = input(f'\nattachmentID: ') 263 | attachmentID = attachmentID.split(', ') 264 | for i in attachmentID: 265 | i = int(i) - 1 266 | get_attachments(mediaFile) 267 | extractPath = os.path.join(extractFolder, attach_name) 268 | extractParam = f'{attach_id}:"{extractPath}"' 269 | commandList.append(extractParam) 270 | extractParam = ' '.join(commandList) 271 | command = f'mkvextract "{mediaFile}" attachments {extractParam}' 272 | process = subprocess.run(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) 273 | print('\n' + process.stdout.decode()) 274 | elif len(jsonData.get('attachments')) == 0: 275 | print(f'No attachments available in "{os.path.basename(mediaFile)}"') 276 | 277 | #run commands for extract timestamps for all tracks 278 | 279 | def runTimestamps(mediaFile): 280 | global i 281 | commandList = [] 282 | makeFolder(mediaFile) 283 | get_output(mediaFile) 284 | trackCount = len(jsonData['tracks']) 285 | for i in range(trackCount): 286 | get_tracks(mediaFile) 287 | extractName = f'TrackID_{id}_[{track_type}]_[tc].txt' 288 | extractPath = os.path.join(extractFolder, extractName) 289 | extractParam = f'{id}:"{extractPath}"' 290 | commandList.append(extractParam) 291 | extractParam = ' '.join(commandList) 292 | command = f'mkvextract "{mediaFile}" timecodes_v2 {extractParam}' 293 | process = subprocess.run(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) 294 | print(process.stdout.decode()) 295 | 296 | #run commands for extract cues for all tracks 297 | 298 | def runCues(mediaFile): 299 | global i 300 | commandList = [] 301 | makeFolder(mediaFile) 302 | get_output(mediaFile) 303 | trackCount = len(jsonData['tracks']) 304 | for i in range(trackCount): 305 | get_tracks(mediaFile) 306 | extractName = f'TrackID_{id}_[{track_type}]_[cues].txt' 307 | extractPath = os.path.join(extractFolder, extractName) 308 | extractParam = f'{id}:"{extractPath}"' 309 | commandList.append(extractParam) 310 | extractParam = ' '.join(commandList) 311 | command = f'mkvextract "{mediaFile}" cues {extractParam}' 312 | process = subprocess.run(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) 313 | print(process.stdout.decode()) 314 | 315 | #run commands for extract cue sheet 316 | 317 | def runCueSheet(mediaFile): 318 | makeFolder(mediaFile) 319 | extractPath = os.path.join(extractFolder, 'Cue_Sheet.cue') 320 | command = f'mkvextract "{mediaFile}" cuesheet "{extractPath}"' 321 | process = subprocess.run(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) 322 | print(process.stdout.decode()) 323 | 324 | #run commands for extract tags 325 | 326 | def runTags(mediaFile): 327 | makeFolder(mediaFile) 328 | get_output(mediaFile) 329 | if (len(jsonData.get('global_tags')) > 0) or (len(jsonData.get('track_tags')) > 0): 330 | extractPath = os.path.join(extractFolder, 'Tags.xml') 331 | command = f'mkvextract "{mediaFile}" tags "{extractPath}"' 332 | process = subprocess.run(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) 333 | print(f'Extracting tags to the file "{extractPath}".\nProgress: 100%') 334 | elif (len(jsonData.get('global_tags')) == 0) and (len(jsonData.get('track_tags')) == 0): 335 | print(f'No tags available in "{os.path.basename(mediaFile)}"') 336 | 337 | #main screen for running the script 338 | 339 | def main(): 340 | inputProcess() 341 | mediaList = mkvList + webmList 342 | mediaList = sorted(mediaList) 343 | 344 | extractMode = int(input( 345 | f'\nmkvextractor (MKVToolNix : mkvextract)\ 346 | \n|\ 347 | \n|-- 1 : Extract All Tracks\ 348 | \n|-- 2 : Extract Single Tracks\ 349 | \n|-- 3 : Extract Chapters\ 350 | \n|-- 4 : Extract Attachments\ 351 | \n|-- 5 : Extract Timestamps\ 352 | \n|-- 6 : Extract Cues\ 353 | \n|-- 7 : Extract Cue Sheet\ 354 | \n|-- 8 : Extract Tags\ 355 | \n\ 356 | \nextractMode: ' 357 | )) 358 | 359 | print(' ') 360 | 361 | if extractMode == 1: 362 | for file in mediaList: runTracks(file) 363 | elif extractMode == 2: 364 | for file in mediaList: runTrack(file) 365 | elif extractMode == 3: 366 | global chaptersMode 367 | chaptersMode = int(input( 368 | f'1 : XML Chapters\ 369 | \n2 : OGM Chapters\ 370 | \n\nchaptersMode: ')) 371 | print(' ') 372 | for file in mediaList: runChapters(file) 373 | elif extractMode == 4: 374 | for file in mediaList: runAttachments(file) 375 | elif extractMode == 5: 376 | for file in mediaList: runTimestamps(file) 377 | elif extractMode == 6: 378 | for file in mediaList: runCues(file) 379 | elif extractMode == 7: 380 | for file in mediaList: runCueSheet(file) 381 | elif extractMode == 8: 382 | for file in mediaList: runTags(file) 383 | 384 | #run script 385 | 386 | if __name__ == "__main__": 387 | main() 388 | --------------------------------------------------------------------------------