├── .gitignore ├── LICENSE ├── README.md └── index.py /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | *.mkv 3 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Leko 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ytarchive-raw 2 | 3 | ## Description 4 | 5 | This project introduces a new method to grab **Privated**, **Removed** or **any** unavailable YouTube livestreams with prepared metadata files. 6 | 7 | Use with [Auto YTA](https://github.com/lekoOwO/auto-ytarchive-raw) prevent any missing livestreams! 8 | 9 | ## Dependencies 10 | 11 | - ffmpeg 12 | 13 | - python3 > 3.4 14 | 15 | ## Usage 16 | 17 | Get freg json file using the [bookmark script](https://gist.github.com/lekoOwO/c90c09409446e6c7663c489bf06dc649). 18 | 19 | And do `python index.py -i FREG_JSON_FILE` 20 | 21 | TADA! 22 | 23 | See full parameter lists by `-h`: 24 | 25 | ``` 26 | Parameters: 27 | -i, --input [JSON_FILE] Input JSON file. Do not use with -iv or -ia. 28 | -iv, --input-video [URL] Input video URL. Use with -ia. 29 | -ia, --input-audio [URL] Input audio URL. Use with -iv. 30 | 31 | -o, --output [OUTPUT_FILE] Output file path. Uses `YYYYMMDD TITLE (VIDEO_ID).mkv` by default. 32 | -s5, --socks5-proxy [proxy] Socks5 Proxy. No schema should be provided in the proxy url. PySocks should be installed. 33 | -hp, --http-proxy [proxy] HTTP Proxy. 34 | -t, --threads [INT] Multi-thread download, experimental. 35 | -T, --timeout [INT] Secs for retrying when encounter HTTP errors. Default 20. 36 | -p, --pool [FILE] IP Pool file. 37 | -d, --temp-dir [DIR] Temp file dir. 38 | -v, --verbose Enable debug mode. 39 | -k, --keep-files Keep intermediate files 40 | ``` 41 | -------------------------------------------------------------------------------- /index.py: -------------------------------------------------------------------------------- 1 | """This project introduces a new method to grab Privated, 2 | Removed or any unavailable YouTube livestreams with prepared metadata files.""" 3 | import argparse 4 | import functools 5 | import http.client 6 | import ipaddress 7 | import itertools 8 | import json 9 | import logging 10 | import os 11 | import pathlib 12 | import random 13 | import re 14 | import shutil 15 | import socket 16 | import subprocess 17 | import sys 18 | import tempfile 19 | import threading 20 | import time 21 | import traceback 22 | import urllib.error 23 | import urllib.request 24 | from argparse import Namespace 25 | from datetime import date 26 | from urllib.parse import parse_qs, urlencode, urlsplit, urlunsplit 27 | 28 | FAIL_THRESHOLD = 20 29 | RETRY_THRESHOLD = 3 30 | SLEEP_AFTER_FETCH_FREG = 0 31 | DEBUG = False 32 | THREADS = 1 33 | IP_POOL = None 34 | HTTP_TIMEOUT = 5 35 | 36 | BASE_DIR = None 37 | 38 | PBAR_LEN = 80 39 | PBAR_SYMBOL = "█" 40 | PBAR_EMPTY_SYMBOL = "-" 41 | PBAR_PRINT_INTERVAL = 5 42 | 43 | ACCENT_CHARS = dict( 44 | zip( 45 | "ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ", 46 | itertools.chain( 47 | "AAAAAA", 48 | ["AE"], 49 | "CEEEEIIIIDNOOOOOOO", 50 | ["OE"], 51 | "UUUUUY", 52 | ["TH", "ss"], 53 | "aaaaaa", 54 | ["ae"], 55 | "ceeeeiiiionooooooo", 56 | ["oe"], 57 | "uuuuuy", 58 | ["th"], 59 | "y", 60 | ), 61 | ) 62 | ) 63 | 64 | socket.setdefaulttimeout(HTTP_TIMEOUT) 65 | # ===== utils ===== 66 | def sanitize_filename(substitution, restricted=False, is_id=False): 67 | """Sanitizes a string so it could be used as part of a filename. 68 | If restricted is set, use a stricter subset of allowed characters. 69 | Set is_id if this is not an arbitrary string, but an ID that should be kept 70 | if possible. 71 | """ 72 | 73 | def replace_insane(char): 74 | if restricted and char in ACCENT_CHARS: 75 | return ACCENT_CHARS[char] 76 | if char == "?" or ord(char) < 32 or ord(char) == 127: 77 | return "" 78 | if char == '"': 79 | return "" if restricted else "'" 80 | if char == ":": 81 | return "_-" if restricted else " -" 82 | if char in "\\/|*<>": 83 | return "_" 84 | if restricted and (char in "!&'()[]{}$;`^,#" or char.isspace()): 85 | return "_" 86 | if restricted and ord(char) > 127: 87 | return "_" 88 | return char 89 | 90 | # Handle timestamps 91 | substitution = re.sub( 92 | r"[0-9]+(?::[0-9]+)+", lambda m: m.group(0).replace(":", "_"), substitution 93 | ) 94 | result = "".join(map(replace_insane, substitution)) 95 | if not is_id: 96 | while "__" in result: 97 | result = result.replace("__", "_") 98 | result = result.strip("_") 99 | # Common case of "Foreign band name - English song title" 100 | if restricted and result.startswith("-_"): 101 | result = result[2:] 102 | if result.startswith("-"): 103 | result = "_" + result[len("-") :] 104 | result = result.lstrip(".") 105 | if not result: 106 | result = "_" 107 | return result 108 | 109 | 110 | # ===== utils end ===== 111 | 112 | ##### Beautiful stuff ##### 113 | bcolors = Namespace( 114 | HEADER="\033[95m", 115 | OKBLUE="\033[94m", 116 | OKCYAN="\033[96m", 117 | OKGREEN="\033[92m", 118 | WARNING="\033[93m", 119 | FAIL="\033[91m", 120 | ENDC="\033[0m", 121 | BOLD="\033[1m", 122 | UNDERLINE="\033[4m", 123 | ) 124 | 125 | # Custom formatter https://stackoverflow.com/questions/1343227/ 126 | class Formatter(logging.Formatter): 127 | 128 | err_fmt = f"{bcolors.FAIL}[ERROR] %(msg)s{bcolors.ENDC}" "ERROR: %(msg)s" 129 | dbg_fmt = "[DEBUG] %(msg)s" 130 | info_fmt = "[INFO] %(msg)s" 131 | warn_fmt = f"{bcolors.WARNING}[WARN] %(msg)s{bcolors.ENDC}" 132 | 133 | def __init__(self, fmt="%(levelno)s: %(msg)s"): 134 | logging.Formatter.__init__(self, fmt) 135 | 136 | def format(self, record): 137 | 138 | # Save the original format configured by the user 139 | # when the logger formatter was instantiated 140 | format_orig = self._fmt 141 | 142 | # Replace the original format with one customized by logging level 143 | if record.levelno == logging.DEBUG: 144 | self._fmt = self.dbg_fmt 145 | 146 | elif record.levelno == logging.INFO: 147 | self._fmt = self.info_fmt 148 | 149 | elif record.levelno == logging.ERROR: 150 | self._fmt = self.err_fmt 151 | 152 | elif record.levelno == logging.WARN: 153 | self._fmt = self.warn_fmt 154 | 155 | # Call the original formatter class to do the grunt work 156 | result = logging.Formatter.format(self, record) 157 | 158 | # Restore the original format configured by the user 159 | self._fmt = format_orig 160 | 161 | return result 162 | 163 | 164 | logger = logging.getLogger(__name__) 165 | formatter = Formatter() 166 | handler = logging.StreamHandler() 167 | handler.setFormatter(formatter) 168 | logger.addHandler(handler) 169 | logger.setLevel(logging.DEBUG if DEBUG else logging.INFO) 170 | 171 | 172 | class ProgressBar: 173 | def __init__(self, total, print_func=print): 174 | self.total = total 175 | self.progress = [] 176 | self.progress_index = {} 177 | self.print = print_func 178 | self.finished = 0 179 | 180 | for i in range(PBAR_LEN): 181 | x = int(total / PBAR_LEN) * (i + 1) 182 | self.progress.append([x, False]) 183 | self.progress_index[x] = i 184 | 185 | def done(self, index): 186 | if index in self.progress_index: 187 | self.progress[self.progress_index[index]][1] = True 188 | self.finished += 1 189 | if not self.finished % PBAR_PRINT_INTERVAL or self.finished == self.total: 190 | self.print_progress() 191 | 192 | def print_progress(self): 193 | bar_str = "" 194 | for x in self.progress: 195 | bar_str += PBAR_SYMBOL if x[1] else PBAR_EMPTY_SYMBOL 196 | self.print(bar_str, self.finished / self.total) 197 | 198 | 199 | ##### - Beautiful stuff - ##### 200 | opener = None 201 | 202 | 203 | def set_http_proxy(proxy): 204 | global opener 205 | handler = urllib.request.ProxyHandler( 206 | {"http": f"http://{proxy}", "https": f"http://{proxy}"} 207 | ) 208 | opener = urllib.request.build_opener(handler) 209 | 210 | 211 | def set_socks5_proxy(host, port): 212 | import socks 213 | 214 | socks.set_default_proxy(socks.SOCKS5, proxy, port) 215 | socket.socket = socks.socksocket 216 | 217 | 218 | def get_seg_url(url, seg): 219 | parsed_url = urlsplit(url) 220 | qs = parse_qs(parsed_url.query) 221 | 222 | qs["sq"] = str(seg) 223 | 224 | parsed_url = list(parsed_url) 225 | parsed_url[3] = urlencode(qs, doseq=True) 226 | 227 | return urlunsplit(parsed_url) 228 | 229 | 230 | def get_total_segment(url): 231 | seg_url = get_seg_url(url, 0) 232 | headers = None 233 | try: 234 | with urllib.request.urlopen(seg_url) as f: 235 | headers = f.headers 236 | except urllib.error.HTTPError as e: 237 | headers = e.headers 238 | return int(headers["x-head-seqnum"]) 239 | 240 | 241 | class SegmentStatus: 242 | def __init__(self, url, log_prefix=""): 243 | self.segs = {} 244 | self.merged_seg = -1 245 | 246 | logger.info(f"{log_prefix} Try getting total segments...") 247 | self.end_seg = get_total_segment(url) 248 | logger.info(f"{log_prefix} Total segments: {self.end_seg}") 249 | 250 | self.seg_groups = [] 251 | 252 | # Groups 253 | last_seg = -1 254 | interval = int(self.end_seg / THREADS) 255 | while True: 256 | if last_seg + 1 + interval < self.end_seg: 257 | self.seg_groups.append((last_seg + 1, last_seg + 1 + interval)) 258 | last_seg = last_seg + 1 + interval 259 | else: 260 | self.seg_groups.append((last_seg + 1, self.end_seg)) 261 | break 262 | 263 | 264 | ## IP Pool 265 | class BoundHTTPHandler(urllib.request.HTTPHandler): 266 | def __init__(self, *args, source_address=None, **kwargs): 267 | urllib.request.HTTPHandler.__init__(self, *args, **kwargs) 268 | self.http_class = functools.partial( 269 | http.client.HTTPConnection, 270 | source_address=source_address, 271 | timeout=HTTP_TIMEOUT, 272 | ) 273 | 274 | def http_open(self, req): 275 | return self.do_open(self.http_class, req) 276 | 277 | 278 | class BoundHTTPSHandler(urllib.request.HTTPSHandler): 279 | def __init__(self, *args, source_address=None, **kwargs): 280 | urllib.request.HTTPSHandler.__init__(self, *args, **kwargs) 281 | self.https_class = functools.partial( 282 | http.client.HTTPSConnection, 283 | source_address=source_address, 284 | timeout=HTTP_TIMEOUT, 285 | ) 286 | 287 | def https_open(self, req): 288 | return self.do_open( 289 | self.https_class, 290 | req, 291 | context=self._context, 292 | check_hostname=self._check_hostname, 293 | ) 294 | 295 | 296 | def get_random_line(filepath: str) -> str: 297 | file_size = os.path.getsize(filepath) 298 | with open(filepath, "rb") as f: 299 | while True: 300 | pos = random.randint(0, file_size) 301 | if not pos: # the first line is chosen 302 | return f.readline().decode() # return str 303 | f.seek(pos) # seek to random position 304 | f.readline() # skip possibly incomplete line 305 | line = f.readline() # read next (full) line 306 | if line: 307 | return line.decode() 308 | # else: line is empty -> EOF -> try another position in next iteration 309 | 310 | 311 | def is_ip(ip): 312 | try: 313 | ip = ipaddress.ip_address(ip) 314 | return True 315 | except ValueError: 316 | return False 317 | 318 | 319 | def get_pool_ip(): 320 | if IP_POOL: 321 | if os.path.isfile(IP_POOL): 322 | for _ in range(3): 323 | ip = get_random_line(IP_POOL).rstrip().lstrip() 324 | if is_ip(ip): 325 | return ip 326 | return None 327 | 328 | 329 | ## IP Pool end 330 | 331 | 332 | def readfile(filepath, encoding="utf-8"): 333 | try: 334 | with open(filepath, "r", encoding=encoding) as f: 335 | return f.read() 336 | except: 337 | return "" 338 | 339 | 340 | def openurl(url, retry=0, source_address="random"): 341 | global opener 342 | 343 | def error_handle(e): 344 | if retry >= RETRY_THRESHOLD: 345 | raise e 346 | return openurl(url, retry + 1, source_address) 347 | 348 | try: 349 | if opener: 350 | return opener.open(url) 351 | if source_address == "random": 352 | source_address = get_pool_ip() 353 | if not is_ip(source_address): 354 | source_address = None 355 | if source_address: 356 | logger.debug(f"Using IP: {source_address}") 357 | if isinstance(url, str): 358 | schema = urllib.parse.urlsplit(url).scheme 359 | elif isinstance(url, urllib.request.Request): 360 | schema = urllib.parse.urlsplit(url.full_url).scheme 361 | 362 | handler = (BoundHTTPHandler if schema == "http" else BoundHTTPSHandler)( 363 | source_address=(source_address, 0) 364 | ) 365 | return urllib.request.build_opener(handler).open(url) 366 | return urllib.request.urlopen(url) 367 | except (http.client.IncompleteRead, socket.timeout) as e: 368 | error_handle(e) 369 | except urllib.error.HTTPError as e: 370 | raise e 371 | except urllib.error.URLError as e: 372 | error_handle(e) 373 | except Exception as e: 374 | error_handle(e) 375 | 376 | 377 | def download_segment(base_url, seg, seg_status, log_prefix=""): 378 | target_url = get_seg_url(base_url, seg) 379 | 380 | target_url_with_header = urllib.request.Request( 381 | target_url, 382 | headers={ 383 | "User-Agent": ( 384 | "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 " 385 | "(KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36" 386 | ) 387 | }, 388 | ) 389 | 390 | try: 391 | with openurl(target_url_with_header) as response: 392 | with tempfile.NamedTemporaryFile( 393 | delete=False, 394 | prefix="ytarchive_raw.", 395 | suffix=f".{seg}.seg", 396 | dir=BASE_DIR, 397 | ) as tmp_file: 398 | shutil.copyfileobj(response, tmp_file) 399 | seg_status.segs[seg] = tmp_file.name 400 | return True 401 | 402 | except urllib.error.HTTPError as e: 403 | logger.debug(f"{log_prefix} Seg {seg} Failed with {e.code}") 404 | if e.code == 403: 405 | try: 406 | openurl(base_url) 407 | except urllib.error.HTTPError: 408 | return False 409 | return False 410 | 411 | except (http.client.IncompleteRead, socket.timeout): 412 | return False 413 | 414 | except: 415 | return False 416 | 417 | 418 | def merge_segs(target_file, seg_status, not_merged_segs=[], log_prefix=""): 419 | while seg_status.merged_seg != seg_status.end_seg: 420 | if (seg_status.merged_seg + 1) not in seg_status.segs: 421 | logger.debug( 422 | f"{log_prefix} Waiting for Segment {seg_status.merged_seg + 1} ready for merging..." 423 | ) 424 | time.sleep(1) 425 | continue 426 | 427 | if seg_status.segs[seg_status.merged_seg + 1] is not None: 428 | if os.path.exists(target_file): 429 | mode = "ab" 430 | else: 431 | mode = "wb" 432 | 433 | with open(target_file, mode) as target: 434 | with open(seg_status.segs[seg_status.merged_seg + 1], "rb") as source: 435 | shutil.copyfileobj(source, target) 436 | 437 | try: 438 | os.remove(seg_status.segs[seg_status.merged_seg + 1]) 439 | except: 440 | pass 441 | else: 442 | not_merged_segs.append(seg_status.merged_seg + 1) 443 | 444 | seg_status.merged_seg += 1 445 | seg_status.segs.pop(seg_status.merged_seg) 446 | 447 | 448 | def download_seg_group( 449 | url, seg_group_index, seg_status, log_prefix="", post_dl_seg=lambda x: True 450 | ): 451 | seg_range = seg_status.seg_groups[seg_group_index] 452 | seg = seg_range[0] 453 | fail_count = 0 454 | 455 | try: 456 | while True: 457 | if fail_count < FAIL_THRESHOLD: 458 | logger.debug(f"{log_prefix} Current Seg: {seg}") 459 | 460 | status = download_segment(url, seg, seg_status, log_prefix) 461 | 462 | if status: 463 | logger.debug(f"{log_prefix} Success Seg: {seg}") 464 | post_dl_seg(seg) 465 | if seg == seg_range[1]: 466 | return True 467 | seg += 1 468 | fail_count = 0 469 | else: 470 | fail_count += 1 471 | logger.debug( 472 | f"{log_prefix} Failed Seg: {seg} [{fail_count}/{FAIL_THRESHOLD}]" 473 | ) 474 | time.sleep(1) 475 | else: 476 | logger.warn(f"{log_prefix} Giving up seg: {seg}") 477 | seg_status.segs[seg] = None # Skip this seg 478 | post_dl_seg(seg) 479 | if seg == seg_range[1]: 480 | return True 481 | seg += 1 482 | fail_count = 0 483 | 484 | except: 485 | traceback.print_exc() 486 | sys.exit(1) 487 | 488 | 489 | def get_args(): 490 | parser = argparse.ArgumentParser(description="") 491 | arg_dict = { 492 | "input": { 493 | "switch": ["-i", "--input"], 494 | "help": "Input JSON file.", 495 | "type": str, 496 | }, 497 | "output": { 498 | "switch": ["-o", "--output"], 499 | "help": "Output file path. Uses `YYYYMMDD TITLE (VIDEO_ID).mkv` by default.", 500 | "type": str, 501 | }, 502 | "socks": { 503 | "switch": ["-s", "--socks5-proxy"], 504 | "help": ( 505 | "Socks5 Proxy. " 506 | "No schema should be provided in the proxy url. " 507 | "PySocks should be installed." 508 | ), 509 | "type": str, 510 | }, 511 | "http-proxy": { 512 | "switch": ["-P", "--http-proxy"], 513 | "help": "HTTP Proxy", 514 | "type": str, 515 | }, 516 | "threads": { 517 | "switch": ["-t", "--threads"], 518 | "help": "Multi-threaded download", 519 | "type": int, 520 | }, 521 | "pool": { 522 | "switch": ["-p", "--pool"], 523 | "help": "IP Pool file.", 524 | "type": str, 525 | }, 526 | "temp-dir": { 527 | "switch": ["-d", "--temp-dir"], 528 | "help": "Directory containing the temporary files", 529 | "type": str, 530 | }, 531 | "timeout": { 532 | "switch": ["-T", "--timeout"], 533 | "help": "Secs for retrying when encounter HTTP errors. Default 20.", 534 | "type": int, 535 | }, 536 | } 537 | for arg in arg_dict: 538 | parser.add_argument( 539 | *arg_dict[arg]["switch"], 540 | help=arg_dict[arg]["help"], 541 | type=arg_dict[arg]["type"], 542 | default=None, 543 | ) 544 | parser.add_argument( 545 | "-v", "--verbose", help="Enable debug mode", action="store_true" 546 | ) 547 | parser.add_argument( 548 | "-k", "--keep-files", help="Do not delete temporary files", action="store_true" 549 | ) 550 | args = parser.parse_args() 551 | return args 552 | 553 | 554 | def main(url, target_file, not_merged_segs=[], log_prefix="", print_func=print): 555 | seg_status = SegmentStatus(url, log_prefix) 556 | pbar = ProgressBar( 557 | seg_status.end_seg, 558 | lambda bar, p: print_func(f"{log_prefix}: |{bar}| {'{:.2f}'.format(p*100)}%"), 559 | ) 560 | 561 | merge_thread = threading.Thread( 562 | target=merge_segs, 563 | args=(target_file, seg_status, not_merged_segs, log_prefix), 564 | daemon=True, 565 | ) 566 | merge_thread.start() 567 | 568 | for i in range(len(seg_status.seg_groups)): 569 | threading.Thread( 570 | target=download_seg_group, 571 | args=(url, i, seg_status, log_prefix, lambda x: pbar.done(x)), 572 | daemon=True, 573 | ).start() 574 | 575 | merge_thread.join() # Wait for merge finished 576 | 577 | 578 | if __name__ == "__main__": 579 | os.system("") # enable colors on windows 580 | 581 | try: 582 | # Parse params 583 | args = get_args() 584 | print(args) 585 | param = {"output": None, "iv": [], "ia": [], "delete_tmp": True} 586 | with open(args.input, "r") as input_io: 587 | input_data = json.load(input_io) 588 | param["iv"].append(list(input_data["video"].values())[0]) 589 | param["ia"].append(list(input_data["audio"].values())[0]) 590 | if args.output: 591 | param["output"] = args.output 592 | if args.socks5_proxy: 593 | if ":" in args.socks5_proxy: 594 | host, port = args.socks5_proxy.split(":") 595 | port = int(port) 596 | else: 597 | host = args.socks5_proxy 598 | port = 3128 599 | set_socks5_proxy(host, port) 600 | if args.http_proxy: 601 | set_http_proxy(args.http_proxy) 602 | if args.threads: 603 | THREADS = args.threads 604 | if args.pool: 605 | IP_POOL = args.pool 606 | if args.verbose: 607 | DEBUG = True 608 | if args.temp_dir: 609 | BASE_DIR = args.temp_dir 610 | if args.keep_files: 611 | param["delete_tmp"] = False 612 | if args.timeout: 613 | FAIL_THRESHOLD = args.timeout 614 | 615 | if param["output"] is None: 616 | if input_data is not None: 617 | try: 618 | param["output"] = ( 619 | f"{date.today().strftime('%Y%m%d')} " 620 | f"{sanitize_filename(input_data['metadata']['title'])} " 621 | f"({input_data['metadata']['id']}).mkv" 622 | ) 623 | except Exception as e: 624 | raise RuntimeError( 625 | "JSON Version should be > 1.0, please update to the latest grabber." 626 | ) 627 | else: 628 | raise RuntimeError("Output param not found.") 629 | if pathlib.Path(param["output"]).suffix.lower() != ".mkv": 630 | raise RuntimeError("Output should be a mkv file.") 631 | if not param["ia"] or not param["iv"]: 632 | raise RuntimeError( 633 | "Input data not sufficient. Both video and audio has to be inputed." 634 | ) 635 | if len(param["ia"]) != len(param["iv"]): 636 | raise RuntimeError("Input video and audio length mismatch.") 637 | 638 | if not BASE_DIR: 639 | BASE_DIR = tempfile.mkdtemp( 640 | prefix="ytarchive_raw.", 641 | suffix=f".{input_data['metadata']['id']}" 642 | if input_data is not None 643 | else None, 644 | ) 645 | elif os.path.isdir(BASE_DIR): 646 | BASE_DIR = tempfile.mkdtemp( 647 | prefix="ytarchive_raw.", 648 | suffix=f".{input_data['metadata']['id']}" 649 | if input_data is not None 650 | else None, 651 | dir=BASE_DIR, 652 | ) 653 | else: 654 | os.makedirs(BASE_DIR) 655 | 656 | tmp_video = [] 657 | tmp_audio = [] 658 | video_not_merged_segs = [] 659 | audio_not_merged_segs = [] 660 | 661 | for i in range(len(param["iv"])): 662 | tmp_video_f = tempfile.NamedTemporaryFile( 663 | delete=False, 664 | prefix="ytarchive_raw.", 665 | suffix=f".video.{i}", 666 | dir=BASE_DIR, 667 | ) 668 | tmp_video.append(tmp_video_f.name) 669 | tmp_video_f.close() 670 | 671 | tmp_audio_f = tempfile.NamedTemporaryFile( 672 | delete=False, 673 | prefix="ytarchive_raw.", 674 | suffix=f".audio.{i}", 675 | dir=BASE_DIR, 676 | ) 677 | tmp_audio.append(tmp_audio_f.name) 678 | tmp_audio_f.close() 679 | 680 | for i in range(len(param["iv"])): 681 | video_thread = threading.Thread( 682 | target=main, 683 | args=( 684 | param["iv"][i], 685 | tmp_video[i], 686 | video_not_merged_segs, 687 | f"[Video.{i}]", 688 | lambda x: print(f"{bcolors.OKBLUE}{x}{bcolors.ENDC}"), 689 | ), 690 | daemon=True, 691 | ) 692 | audio_thread = threading.Thread( 693 | target=main, 694 | args=( 695 | param["ia"][i], 696 | tmp_audio[i], 697 | audio_not_merged_segs, 698 | f"[Audio.{i}]", 699 | lambda x: print(f"{bcolors.OKGREEN}{x}{bcolors.ENDC}"), 700 | ), 701 | daemon=True, 702 | ) 703 | 704 | video_thread.start() 705 | audio_thread.start() 706 | 707 | while video_thread.is_alive(): 708 | video_thread.join(0.5) 709 | while audio_thread.is_alive(): 710 | audio_thread.join(0.5) 711 | 712 | if video_not_merged_segs: 713 | logger.warn(f"Gived up video segments: {video_not_merged_segs}") 714 | if audio_not_merged_segs: 715 | logger.warn(f"Gived up audio segments: {audio_not_merged_segs}") 716 | 717 | logger.info("Download finished. Merging...") 718 | 719 | ffmpeg_params = [] 720 | if input_data is not None: 721 | tmp_thumbnail = None 722 | with urllib.request.urlopen( 723 | input_data["metadata"]["thumbnail"] 724 | ) as response: 725 | with tempfile.NamedTemporaryFile( 726 | delete=False, prefix="ytarchive_raw.", suffix=".jpg", dir=BASE_DIR 727 | ) as tmp_file: 728 | shutil.copyfileobj(response, tmp_file) 729 | tmp_thumbnail = tmp_file.name 730 | 731 | ffmpeg_params = [ 732 | "-metadata", 733 | 'title="{}"'.format(input_data["metadata"]["title"].replace('"', "''")), 734 | "-metadata", 735 | 'comment="{}"'.format( 736 | input_data["metadata"]["description"].replace('"', "''") 737 | ), 738 | "-metadata", 739 | 'author="{}"'.format( 740 | input_data["metadata"]["channelName"].replace('"', "''") 741 | ), 742 | "-metadata", 743 | 'episode_id="{}"'.format( 744 | input_data["metadata"]["id"].replace('"', "''") 745 | ), 746 | "-attach", 747 | tmp_thumbnail, 748 | "-metadata:s:t", 749 | "mimetype=image/jpeg", 750 | "-metadata:s:t", 751 | 'filename="thumbnail.jpg"', 752 | ] 753 | 754 | # have FFmpeg write the full log to a tempfile, 755 | # in addition to the terse log on stdout/stderr. 756 | # The logfile will be overwritten every time 757 | # so we'll keep appending the contents to ff_logtext 758 | with tempfile.NamedTemporaryFile( 759 | delete=False, prefix="ytarchive_raw.", suffix=".ffmpeg.log", dir=BASE_DIR 760 | ) as tmp_file: 761 | ff_logpath = tmp_file.name 762 | 763 | ff_logtext = "" 764 | ff_env = os.environ.copy() 765 | ff_env["FFREPORT"] = f"file='{ff_logpath}':level=32" # 32=info/normal 766 | 767 | if len(tmp_video) == 1: 768 | cmd = ( 769 | [ 770 | "ffmpeg", 771 | "-y", 772 | "-v", 773 | "warning", 774 | "-i", 775 | tmp_video[0], 776 | "-i", 777 | tmp_audio[0], 778 | "-c", 779 | "copy", 780 | ] 781 | + ffmpeg_params 782 | + [param["output"]] 783 | ) 784 | logger.debug(f"ffmpeg command: {cmd}") 785 | p = subprocess.Popen( 786 | cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=ff_env 787 | ) 788 | out, err = p.communicate() 789 | retcode = p.returncode 790 | ff_logtext += readfile(ff_logpath) 791 | 792 | if isinstance(out, bytes): 793 | out = out.decode(sys.stdout.encoding) 794 | if isinstance(err, bytes): 795 | err = err.decode(sys.stdout.encoding) 796 | else: 797 | tmp_merged = [] 798 | out = "" 799 | err = "" 800 | retcode = 0 801 | for i in range(len(param["iv"])): 802 | with tempfile.NamedTemporaryFile( 803 | prefix="ytarchive_raw.", suffix=f".merged.{i}.mkv", dir=BASE_DIR 804 | ) as tmp_merged_f: 805 | tmp_merged.append(tmp_merged_f.name) 806 | 807 | cmd = [ 808 | "ffmpeg", 809 | "-y", 810 | "-v", 811 | "warning", 812 | "-i", 813 | tmp_video[i], 814 | "-i", 815 | tmp_audio[i], 816 | "-c", 817 | "copy", 818 | tmp_merged[i], 819 | ] 820 | logger.debug(f"ffmpeg command merging [{video_idx}]: {cmd}") 821 | p = subprocess.Popen( 822 | cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=ff_env 823 | ) 824 | 825 | out_i, err_i = p.communicate() 826 | retcode = retcode or p.returncode 827 | ff_logtext += readfile(ff_logpath) 828 | 829 | if isinstance(out_i, bytes): 830 | out += out_i.decode(sys.stdout.encoding) 831 | if isinstance(err_i, bytes): 832 | err += err_i.decode(sys.stdout.encoding) 833 | 834 | merged_file_list = "" 835 | with tempfile.NamedTemporaryFile( 836 | delete=False, 837 | prefix="ytarchive_raw.", 838 | suffix=".merged.txt", 839 | dir=BASE_DIR, 840 | encoding="utf-8", 841 | mode="w+", 842 | ) as tmp_file: 843 | data = [] 844 | for x in tmp_merged: 845 | data.append(f"file '{x}'") 846 | data = "\n".join(data) 847 | tmp_file.write(data) 848 | merged_file_list = tmp_file.name 849 | if os.name == "nt": 850 | cmd = ["ffmpeg", "-y", "-safe", "0", "-f", "concat"] 851 | 852 | cmd = ["ffmpeg", "-y", "-f", "concat", "-safe", "0"] 853 | 854 | cmd += ( 855 | ["-v", "warning", "-i", merged_file_list, "-c", "copy"] 856 | + ffmpeg_params 857 | + [param["output"]] 858 | ) 859 | p = subprocess.Popen( 860 | cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=ff_env 861 | ) 862 | retcode = retcode or p.returncode 863 | ff_logtext += readfile(ff_logpath) 864 | 865 | out_i, err_i = p.communicate() 866 | 867 | if isinstance(out_i, bytes): 868 | out += out_i.decode(sys.stdout.encoding) 869 | if isinstance(err_i, bytes): 870 | err += err_i.decode(sys.stdout.encoding) 871 | 872 | logger.debug(f"FFmpeg complete log:\n{ff_logtext}\n") 873 | 874 | # remove harmless warnings 875 | err = err.split("\n") 876 | for ignore in [ 877 | " Last message repeated ", 878 | "Found duplicated MOOV Atom. Skipped it", 879 | "Found unknown-length element with ID 0x18538067 at pos.", # segment header 880 | ]: 881 | err = [x for x in err if ignore not in x] 882 | err = "\n".join(err) 883 | 884 | if retcode: 885 | logger.error(f"FFmpeg complete log:\n{ff_logtext}\n") 886 | logger.error(f"FFmpeg:\n{err}\n\nFailed with error {retcode}") 887 | elif err: 888 | logger.warn(f"FFmpeg:\n{err}\n\nSuccess, but with warnings") 889 | else: 890 | logger.info("All good!") 891 | 892 | except KeyboardInterrupt as e: 893 | logger.info("Program stopped.") 894 | 895 | finally: 896 | try: 897 | if param["delete_tmp"]: 898 | shutil.rmtree(BASE_DIR, ignore_errors=True) 899 | except: 900 | pass 901 | --------------------------------------------------------------------------------