├── .gitignore
├── LICENSE
├── README.md
└── index.py


/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__/
2 | *.mkv
3 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 Leko
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # ytarchive-raw
 2 | 
 3 | ## Description
 4 | 
 5 | This project introduces a new method to grab **Privated**, **Removed** or **any** unavailable YouTube livestreams with prepared metadata files.
 6 | 
 7 | Use with [Auto YTA](https://github.com/lekoOwO/auto-ytarchive-raw) prevent any missing livestreams!
 8 | 
 9 | ## Dependencies
10 | 
11 |  - ffmpeg
12 | 
13 |  - python3 > 3.4
14 | 
15 | ## Usage
16 | 
17 | Get freg json file using the [bookmark script](https://gist.github.com/lekoOwO/c90c09409446e6c7663c489bf06dc649).
18 | 
19 | And do `python index.py -i FREG_JSON_FILE`
20 | 
21 | TADA!
22 | 
23 | See full parameter lists by `-h`:
24 | 
25 | ```
26 | Parameters:
27 | -i, --input [JSON_FILE]     Input JSON file. Do not use with -iv or -ia.
28 | -iv, --input-video [URL]    Input video URL. Use with -ia.
29 | -ia, --input-audio [URL]    Input audio URL. Use with -iv.
30 | 
31 | -o, --output [OUTPUT_FILE]  Output file path. Uses `YYYYMMDD TITLE (VIDEO_ID).mkv` by default.
32 | -s5, --socks5-proxy [proxy] Socks5 Proxy. No schema should be provided in the proxy url. PySocks should be installed.
33 | -hp, --http-proxy [proxy]   HTTP Proxy.
34 | -t, --threads [INT]         Multi-thread download, experimental.
35 | -T, --timeout [INT]         Secs for retrying when encounter HTTP errors. Default 20.
36 | -p, --pool [FILE]           IP Pool file.
37 | -d, --temp-dir [DIR]        Temp file dir.
38 | -v, --verbose               Enable debug mode.
39 | -k, --keep-files            Keep intermediate files
40 | ```
41 | 


--------------------------------------------------------------------------------
/index.py:
--------------------------------------------------------------------------------
  1 | """This project introduces a new method to grab Privated,
  2 | Removed or any unavailable YouTube livestreams with prepared metadata files."""
  3 | import argparse
  4 | import functools
  5 | import http.client
  6 | import ipaddress
  7 | import itertools
  8 | import json
  9 | import logging
 10 | import os
 11 | import pathlib
 12 | import random
 13 | import re
 14 | import shutil
 15 | import socket
 16 | import subprocess
 17 | import sys
 18 | import tempfile
 19 | import threading
 20 | import time
 21 | import traceback
 22 | import urllib.error
 23 | import urllib.request
 24 | from argparse import Namespace
 25 | from datetime import date
 26 | from urllib.parse import parse_qs, urlencode, urlsplit, urlunsplit
 27 | 
 28 | FAIL_THRESHOLD = 20
 29 | RETRY_THRESHOLD = 3
 30 | SLEEP_AFTER_FETCH_FREG = 0
 31 | DEBUG = False
 32 | THREADS = 1
 33 | IP_POOL = None
 34 | HTTP_TIMEOUT = 5
 35 | 
 36 | BASE_DIR = None
 37 | 
 38 | PBAR_LEN = 80
 39 | PBAR_SYMBOL = "█"
 40 | PBAR_EMPTY_SYMBOL = "-"
 41 | PBAR_PRINT_INTERVAL = 5
 42 | 
 43 | ACCENT_CHARS = dict(
 44 |     zip(
 45 |         "ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ",
 46 |         itertools.chain(
 47 |             "AAAAAA",
 48 |             ["AE"],
 49 |             "CEEEEIIIIDNOOOOOOO",
 50 |             ["OE"],
 51 |             "UUUUUY",
 52 |             ["TH", "ss"],
 53 |             "aaaaaa",
 54 |             ["ae"],
 55 |             "ceeeeiiiionooooooo",
 56 |             ["oe"],
 57 |             "uuuuuy",
 58 |             ["th"],
 59 |             "y",
 60 |         ),
 61 |     )
 62 | )
 63 | 
 64 | socket.setdefaulttimeout(HTTP_TIMEOUT)
 65 | # ===== utils =====
 66 | def sanitize_filename(substitution, restricted=False, is_id=False):
 67 |     """Sanitizes a string so it could be used as part of a filename.
 68 |     If restricted is set, use a stricter subset of allowed characters.
 69 |     Set is_id if this is not an arbitrary string, but an ID that should be kept
 70 |     if possible.
 71 |     """
 72 | 
 73 |     def replace_insane(char):
 74 |         if restricted and char in ACCENT_CHARS:
 75 |             return ACCENT_CHARS[char]
 76 |         if char == "?" or ord(char) < 32 or ord(char) == 127:
 77 |             return ""
 78 |         if char == '"':
 79 |             return "" if restricted else "'"
 80 |         if char == ":":
 81 |             return "_-" if restricted else " -"
 82 |         if char in "\\/|*<>":
 83 |             return "_"
 84 |         if restricted and (char in "!&'()[]{}$;`^,#" or char.isspace()):
 85 |             return "_"
 86 |         if restricted and ord(char) > 127:
 87 |             return "_"
 88 |         return char
 89 | 
 90 |     # Handle timestamps
 91 |     substitution = re.sub(
 92 |         r"[0-9]+(?::[0-9]+)+", lambda m: m.group(0).replace(":", "_"), substitution
 93 |     )
 94 |     result = "".join(map(replace_insane, substitution))
 95 |     if not is_id:
 96 |         while "__" in result:
 97 |             result = result.replace("__", "_")
 98 |         result = result.strip("_")
 99 |         # Common case of "Foreign band name - English song title"
100 |         if restricted and result.startswith("-_"):
101 |             result = result[2:]
102 |         if result.startswith("-"):
103 |             result = "_" + result[len("-") :]
104 |         result = result.lstrip(".")
105 |         if not result:
106 |             result = "_"
107 |     return result
108 | 
109 | 
110 | # ===== utils end =====
111 | 
112 | ##### Beautiful stuff #####
113 | bcolors = Namespace(
114 |     HEADER="\033[95m",
115 |     OKBLUE="\033[94m",
116 |     OKCYAN="\033[96m",
117 |     OKGREEN="\033[92m",
118 |     WARNING="\033[93m",
119 |     FAIL="\033[91m",
120 |     ENDC="\033[0m",
121 |     BOLD="\033[1m",
122 |     UNDERLINE="\033[4m",
123 | )
124 | 
125 | # Custom formatter https://stackoverflow.com/questions/1343227/
126 | class Formatter(logging.Formatter):
127 | 
128 |     err_fmt = f"{bcolors.FAIL}[ERROR] %(msg)s{bcolors.ENDC}" "ERROR: %(msg)s"
129 |     dbg_fmt = "[DEBUG] %(msg)s"
130 |     info_fmt = "[INFO] %(msg)s"
131 |     warn_fmt = f"{bcolors.WARNING}[WARN] %(msg)s{bcolors.ENDC}"
132 | 
133 |     def __init__(self, fmt="%(levelno)s: %(msg)s"):
134 |         logging.Formatter.__init__(self, fmt)
135 | 
136 |     def format(self, record):
137 | 
138 |         # Save the original format configured by the user
139 |         # when the logger formatter was instantiated
140 |         format_orig = self._fmt
141 | 
142 |         # Replace the original format with one customized by logging level
143 |         if record.levelno == logging.DEBUG:
144 |             self._fmt = self.dbg_fmt
145 | 
146 |         elif record.levelno == logging.INFO:
147 |             self._fmt = self.info_fmt
148 | 
149 |         elif record.levelno == logging.ERROR:
150 |             self._fmt = self.err_fmt
151 | 
152 |         elif record.levelno == logging.WARN:
153 |             self._fmt = self.warn_fmt
154 | 
155 |         # Call the original formatter class to do the grunt work
156 |         result = logging.Formatter.format(self, record)
157 | 
158 |         # Restore the original format configured by the user
159 |         self._fmt = format_orig
160 | 
161 |         return result
162 | 
163 | 
164 | logger = logging.getLogger(__name__)
165 | formatter = Formatter()
166 | handler = logging.StreamHandler()
167 | handler.setFormatter(formatter)
168 | logger.addHandler(handler)
169 | logger.setLevel(logging.DEBUG if DEBUG else logging.INFO)
170 | 
171 | 
172 | class ProgressBar:
173 |     def __init__(self, total, print_func=print):
174 |         self.total = total
175 |         self.progress = []
176 |         self.progress_index = {}
177 |         self.print = print_func
178 |         self.finished = 0
179 | 
180 |         for i in range(PBAR_LEN):
181 |             x = int(total / PBAR_LEN) * (i + 1)
182 |             self.progress.append([x, False])
183 |             self.progress_index[x] = i
184 | 
185 |     def done(self, index):
186 |         if index in self.progress_index:
187 |             self.progress[self.progress_index[index]][1] = True
188 |         self.finished += 1
189 |         if not self.finished % PBAR_PRINT_INTERVAL or self.finished == self.total:
190 |             self.print_progress()
191 | 
192 |     def print_progress(self):
193 |         bar_str = ""
194 |         for x in self.progress:
195 |             bar_str += PBAR_SYMBOL if x[1] else PBAR_EMPTY_SYMBOL
196 |         self.print(bar_str, self.finished / self.total)
197 | 
198 | 
199 | ##### - Beautiful stuff - #####
200 | opener = None
201 | 
202 | 
203 | def set_http_proxy(proxy):
204 |     global opener
205 |     handler = urllib.request.ProxyHandler(
206 |         {"http": f"http://{proxy}", "https": f"http://{proxy}"}
207 |     )
208 |     opener = urllib.request.build_opener(handler)
209 | 
210 | 
211 | def set_socks5_proxy(host, port):
212 |     import socks
213 | 
214 |     socks.set_default_proxy(socks.SOCKS5, proxy, port)
215 |     socket.socket = socks.socksocket
216 | 
217 | 
218 | def get_seg_url(url, seg):
219 |     parsed_url = urlsplit(url)
220 |     qs = parse_qs(parsed_url.query)
221 | 
222 |     qs["sq"] = str(seg)
223 | 
224 |     parsed_url = list(parsed_url)
225 |     parsed_url[3] = urlencode(qs, doseq=True)
226 | 
227 |     return urlunsplit(parsed_url)
228 | 
229 | 
230 | def get_total_segment(url):
231 |     seg_url = get_seg_url(url, 0)
232 |     headers = None
233 |     try:
234 |         with urllib.request.urlopen(seg_url) as f:
235 |             headers = f.headers
236 |     except urllib.error.HTTPError as e:
237 |         headers = e.headers
238 |     return int(headers["x-head-seqnum"])
239 | 
240 | 
241 | class SegmentStatus:
242 |     def __init__(self, url, log_prefix=""):
243 |         self.segs = {}
244 |         self.merged_seg = -1
245 | 
246 |         logger.info(f"{log_prefix} Try getting total segments...")
247 |         self.end_seg = get_total_segment(url)
248 |         logger.info(f"{log_prefix} Total segments: {self.end_seg}")
249 | 
250 |         self.seg_groups = []
251 | 
252 |         # Groups
253 |         last_seg = -1
254 |         interval = int(self.end_seg / THREADS)
255 |         while True:
256 |             if last_seg + 1 + interval < self.end_seg:
257 |                 self.seg_groups.append((last_seg + 1, last_seg + 1 + interval))
258 |                 last_seg = last_seg + 1 + interval
259 |             else:
260 |                 self.seg_groups.append((last_seg + 1, self.end_seg))
261 |                 break
262 | 
263 | 
264 | ## IP Pool
265 | class BoundHTTPHandler(urllib.request.HTTPHandler):
266 |     def __init__(self, *args, source_address=None, **kwargs):
267 |         urllib.request.HTTPHandler.__init__(self, *args, **kwargs)
268 |         self.http_class = functools.partial(
269 |             http.client.HTTPConnection,
270 |             source_address=source_address,
271 |             timeout=HTTP_TIMEOUT,
272 |         )
273 | 
274 |     def http_open(self, req):
275 |         return self.do_open(self.http_class, req)
276 | 
277 | 
278 | class BoundHTTPSHandler(urllib.request.HTTPSHandler):
279 |     def __init__(self, *args, source_address=None, **kwargs):
280 |         urllib.request.HTTPSHandler.__init__(self, *args, **kwargs)
281 |         self.https_class = functools.partial(
282 |             http.client.HTTPSConnection,
283 |             source_address=source_address,
284 |             timeout=HTTP_TIMEOUT,
285 |         )
286 | 
287 |     def https_open(self, req):
288 |         return self.do_open(
289 |             self.https_class,
290 |             req,
291 |             context=self._context,
292 |             check_hostname=self._check_hostname,
293 |         )
294 | 
295 | 
296 | def get_random_line(filepath: str) -> str:
297 |     file_size = os.path.getsize(filepath)
298 |     with open(filepath, "rb") as f:
299 |         while True:
300 |             pos = random.randint(0, file_size)
301 |             if not pos:  # the first line is chosen
302 |                 return f.readline().decode()  # return str
303 |             f.seek(pos)  # seek to random position
304 |             f.readline()  # skip possibly incomplete line
305 |             line = f.readline()  # read next (full) line
306 |             if line:
307 |                 return line.decode()
308 |             # else: line is empty -> EOF -> try another position in next iteration
309 | 
310 | 
311 | def is_ip(ip):
312 |     try:
313 |         ip = ipaddress.ip_address(ip)
314 |         return True
315 |     except ValueError:
316 |         return False
317 | 
318 | 
319 | def get_pool_ip():
320 |     if IP_POOL:
321 |         if os.path.isfile(IP_POOL):
322 |             for _ in range(3):
323 |                 ip = get_random_line(IP_POOL).rstrip().lstrip()
324 |                 if is_ip(ip):
325 |                     return ip
326 |     return None
327 | 
328 | 
329 | ## IP Pool end
330 | 
331 | 
332 | def readfile(filepath, encoding="utf-8"):
333 |     try:
334 |         with open(filepath, "r", encoding=encoding) as f:
335 |             return f.read()
336 |     except:
337 |         return ""
338 | 
339 | 
340 | def openurl(url, retry=0, source_address="random"):
341 |     global opener
342 | 
343 |     def error_handle(e):
344 |         if retry >= RETRY_THRESHOLD:
345 |             raise e
346 |         return openurl(url, retry + 1, source_address)
347 | 
348 |     try:
349 |         if opener:
350 |             return opener.open(url)
351 |         if source_address == "random":
352 |             source_address = get_pool_ip()
353 |         if not is_ip(source_address):
354 |             source_address = None
355 |         if source_address:
356 |             logger.debug(f"Using IP: {source_address}")
357 |             if isinstance(url, str):
358 |                 schema = urllib.parse.urlsplit(url).scheme
359 |             elif isinstance(url, urllib.request.Request):
360 |                 schema = urllib.parse.urlsplit(url.full_url).scheme
361 | 
362 |             handler = (BoundHTTPHandler if schema == "http" else BoundHTTPSHandler)(
363 |                 source_address=(source_address, 0)
364 |             )
365 |             return urllib.request.build_opener(handler).open(url)
366 |         return urllib.request.urlopen(url)
367 |     except (http.client.IncompleteRead, socket.timeout) as e:
368 |         error_handle(e)
369 |     except urllib.error.HTTPError as e:
370 |         raise e
371 |     except urllib.error.URLError as e:
372 |         error_handle(e)
373 |     except Exception as e:
374 |         error_handle(e)
375 | 
376 | 
377 | def download_segment(base_url, seg, seg_status, log_prefix=""):
378 |     target_url = get_seg_url(base_url, seg)
379 | 
380 |     target_url_with_header = urllib.request.Request(
381 |         target_url,
382 |         headers={
383 |             "User-Agent": (
384 |                 "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
385 |                 "(KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36"
386 |             )
387 |         },
388 |     )
389 | 
390 |     try:
391 |         with openurl(target_url_with_header) as response:
392 |             with tempfile.NamedTemporaryFile(
393 |                 delete=False,
394 |                 prefix="ytarchive_raw.",
395 |                 suffix=f".{seg}.seg",
396 |                 dir=BASE_DIR,
397 |             ) as tmp_file:
398 |                 shutil.copyfileobj(response, tmp_file)
399 |                 seg_status.segs[seg] = tmp_file.name
400 |             return True
401 | 
402 |     except urllib.error.HTTPError as e:
403 |         logger.debug(f"{log_prefix} Seg {seg} Failed with {e.code}")
404 |         if e.code == 403:
405 |             try:
406 |                 openurl(base_url)
407 |             except urllib.error.HTTPError:
408 |                 return False
409 |         return False
410 | 
411 |     except (http.client.IncompleteRead, socket.timeout):
412 |         return False
413 | 
414 |     except:
415 |         return False
416 | 
417 | 
418 | def merge_segs(target_file, seg_status, not_merged_segs=[], log_prefix=""):
419 |     while seg_status.merged_seg != seg_status.end_seg:
420 |         if (seg_status.merged_seg + 1) not in seg_status.segs:
421 |             logger.debug(
422 |                 f"{log_prefix} Waiting for Segment {seg_status.merged_seg + 1} ready for merging..."
423 |             )
424 |             time.sleep(1)
425 |             continue
426 | 
427 |         if seg_status.segs[seg_status.merged_seg + 1] is not None:
428 |             if os.path.exists(target_file):
429 |                 mode = "ab"
430 |             else:
431 |                 mode = "wb"
432 | 
433 |             with open(target_file, mode) as target:
434 |                 with open(seg_status.segs[seg_status.merged_seg + 1], "rb") as source:
435 |                     shutil.copyfileobj(source, target)
436 | 
437 |             try:
438 |                 os.remove(seg_status.segs[seg_status.merged_seg + 1])
439 |             except:
440 |                 pass
441 |         else:
442 |             not_merged_segs.append(seg_status.merged_seg + 1)
443 | 
444 |         seg_status.merged_seg += 1
445 |         seg_status.segs.pop(seg_status.merged_seg)
446 | 
447 | 
448 | def download_seg_group(
449 |     url, seg_group_index, seg_status, log_prefix="", post_dl_seg=lambda x: True
450 | ):
451 |     seg_range = seg_status.seg_groups[seg_group_index]
452 |     seg = seg_range[0]
453 |     fail_count = 0
454 | 
455 |     try:
456 |         while True:
457 |             if fail_count < FAIL_THRESHOLD:
458 |                 logger.debug(f"{log_prefix} Current Seg: {seg}")
459 | 
460 |                 status = download_segment(url, seg, seg_status, log_prefix)
461 | 
462 |                 if status:
463 |                     logger.debug(f"{log_prefix} Success Seg: {seg}")
464 |                     post_dl_seg(seg)
465 |                     if seg == seg_range[1]:
466 |                         return True
467 |                     seg += 1
468 |                     fail_count = 0
469 |                 else:
470 |                     fail_count += 1
471 |                     logger.debug(
472 |                         f"{log_prefix} Failed Seg: {seg} [{fail_count}/{FAIL_THRESHOLD}]"
473 |                     )
474 |                     time.sleep(1)
475 |             else:
476 |                 logger.warn(f"{log_prefix} Giving up seg: {seg}")
477 |                 seg_status.segs[seg] = None  # Skip this seg
478 |                 post_dl_seg(seg)
479 |                 if seg == seg_range[1]:
480 |                     return True
481 |                 seg += 1
482 |                 fail_count = 0
483 | 
484 |     except:
485 |         traceback.print_exc()
486 |         sys.exit(1)
487 | 
488 | 
489 | def get_args():
490 |     parser = argparse.ArgumentParser(description="")
491 |     arg_dict = {
492 |         "input": {
493 |             "switch": ["-i", "--input"],
494 |             "help": "Input JSON file.",
495 |             "type": str,
496 |         },
497 |         "output": {
498 |             "switch": ["-o", "--output"],
499 |             "help": "Output file path. Uses `YYYYMMDD TITLE (VIDEO_ID).mkv` by default.",
500 |             "type": str,
501 |         },
502 |         "socks": {
503 |             "switch": ["-s", "--socks5-proxy"],
504 |             "help": (
505 |                 "Socks5 Proxy. "
506 |                 "No schema should be provided in the proxy url. "
507 |                 "PySocks should be installed."
508 |             ),
509 |             "type": str,
510 |         },
511 |         "http-proxy": {
512 |             "switch": ["-P", "--http-proxy"],
513 |             "help": "HTTP Proxy",
514 |             "type": str,
515 |         },
516 |         "threads": {
517 |             "switch": ["-t", "--threads"],
518 |             "help": "Multi-threaded download",
519 |             "type": int,
520 |         },
521 |         "pool": {
522 |             "switch": ["-p", "--pool"],
523 |             "help": "IP Pool file.",
524 |             "type": str,
525 |         },
526 |         "temp-dir": {
527 |             "switch": ["-d", "--temp-dir"],
528 |             "help": "Directory containing the temporary files",
529 |             "type": str,
530 |         },
531 |         "timeout": {
532 |             "switch": ["-T", "--timeout"],
533 |             "help": "Secs for retrying when encounter HTTP errors. Default 20.",
534 |             "type": int,
535 |         },
536 |     }
537 |     for arg in arg_dict:
538 |         parser.add_argument(
539 |             *arg_dict[arg]["switch"],
540 |             help=arg_dict[arg]["help"],
541 |             type=arg_dict[arg]["type"],
542 |             default=None,
543 |         )
544 |     parser.add_argument(
545 |         "-v", "--verbose", help="Enable debug mode", action="store_true"
546 |     )
547 |     parser.add_argument(
548 |         "-k", "--keep-files", help="Do not delete temporary files", action="store_true"
549 |     )
550 |     args = parser.parse_args()
551 |     return args
552 | 
553 | 
554 | def main(url, target_file, not_merged_segs=[], log_prefix="", print_func=print):
555 |     seg_status = SegmentStatus(url, log_prefix)
556 |     pbar = ProgressBar(
557 |         seg_status.end_seg,
558 |         lambda bar, p: print_func(f"{log_prefix}: |{bar}| {'{:.2f}'.format(p*100)}%"),
559 |     )
560 | 
561 |     merge_thread = threading.Thread(
562 |         target=merge_segs,
563 |         args=(target_file, seg_status, not_merged_segs, log_prefix),
564 |         daemon=True,
565 |     )
566 |     merge_thread.start()
567 | 
568 |     for i in range(len(seg_status.seg_groups)):
569 |         threading.Thread(
570 |             target=download_seg_group,
571 |             args=(url, i, seg_status, log_prefix, lambda x: pbar.done(x)),
572 |             daemon=True,
573 |         ).start()
574 | 
575 |     merge_thread.join()  # Wait for merge finished
576 | 
577 | 
578 | if __name__ == "__main__":
579 |     os.system("")  # enable colors on windows
580 | 
581 |     try:
582 |         # Parse params
583 |         args = get_args()
584 |         print(args)
585 |         param = {"output": None, "iv": [], "ia": [], "delete_tmp": True}
586 |         with open(args.input, "r") as input_io:
587 |             input_data = json.load(input_io)
588 |             param["iv"].append(list(input_data["video"].values())[0])
589 |             param["ia"].append(list(input_data["audio"].values())[0])
590 |         if args.output:
591 |             param["output"] = args.output
592 |         if args.socks5_proxy:
593 |             if ":" in args.socks5_proxy:
594 |                 host, port = args.socks5_proxy.split(":")
595 |                 port = int(port)
596 |             else:
597 |                 host = args.socks5_proxy
598 |                 port = 3128
599 |             set_socks5_proxy(host, port)
600 |         if args.http_proxy:
601 |             set_http_proxy(args.http_proxy)
602 |         if args.threads:
603 |             THREADS = args.threads
604 |         if args.pool:
605 |             IP_POOL = args.pool
606 |         if args.verbose:
607 |             DEBUG = True
608 |         if args.temp_dir:
609 |             BASE_DIR = args.temp_dir
610 |         if args.keep_files:
611 |             param["delete_tmp"] = False
612 |         if args.timeout:
613 |             FAIL_THRESHOLD = args.timeout
614 | 
615 |         if param["output"] is None:
616 |             if input_data is not None:
617 |                 try:
618 |                     param["output"] = (
619 |                         f"{date.today().strftime('%Y%m%d')} "
620 |                         f"{sanitize_filename(input_data['metadata']['title'])} "
621 |                         f"({input_data['metadata']['id']}).mkv"
622 |                     )
623 |                 except Exception as e:
624 |                     raise RuntimeError(
625 |                         "JSON Version should be > 1.0, please update to the latest grabber."
626 |                     )
627 |             else:
628 |                 raise RuntimeError("Output param not found.")
629 |         if pathlib.Path(param["output"]).suffix.lower() != ".mkv":
630 |             raise RuntimeError("Output should be a mkv file.")
631 |         if not param["ia"] or not param["iv"]:
632 |             raise RuntimeError(
633 |                 "Input data not sufficient. Both video and audio has to be inputed."
634 |             )
635 |         if len(param["ia"]) != len(param["iv"]):
636 |             raise RuntimeError("Input video and audio length mismatch.")
637 | 
638 |         if not BASE_DIR:
639 |             BASE_DIR = tempfile.mkdtemp(
640 |                 prefix="ytarchive_raw.",
641 |                 suffix=f".{input_data['metadata']['id']}"
642 |                 if input_data is not None
643 |                 else None,
644 |             )
645 |         elif os.path.isdir(BASE_DIR):
646 |             BASE_DIR = tempfile.mkdtemp(
647 |                 prefix="ytarchive_raw.",
648 |                 suffix=f".{input_data['metadata']['id']}"
649 |                 if input_data is not None
650 |                 else None,
651 |                 dir=BASE_DIR,
652 |             )
653 |         else:
654 |             os.makedirs(BASE_DIR)
655 | 
656 |         tmp_video = []
657 |         tmp_audio = []
658 |         video_not_merged_segs = []
659 |         audio_not_merged_segs = []
660 | 
661 |         for i in range(len(param["iv"])):
662 |             tmp_video_f = tempfile.NamedTemporaryFile(
663 |                 delete=False,
664 |                 prefix="ytarchive_raw.",
665 |                 suffix=f".video.{i}",
666 |                 dir=BASE_DIR,
667 |             )
668 |             tmp_video.append(tmp_video_f.name)
669 |             tmp_video_f.close()
670 | 
671 |             tmp_audio_f = tempfile.NamedTemporaryFile(
672 |                 delete=False,
673 |                 prefix="ytarchive_raw.",
674 |                 suffix=f".audio.{i}",
675 |                 dir=BASE_DIR,
676 |             )
677 |             tmp_audio.append(tmp_audio_f.name)
678 |             tmp_audio_f.close()
679 | 
680 |         for i in range(len(param["iv"])):
681 |             video_thread = threading.Thread(
682 |                 target=main,
683 |                 args=(
684 |                     param["iv"][i],
685 |                     tmp_video[i],
686 |                     video_not_merged_segs,
687 |                     f"[Video.{i}]",
688 |                     lambda x: print(f"{bcolors.OKBLUE}{x}{bcolors.ENDC}"),
689 |                 ),
690 |                 daemon=True,
691 |             )
692 |             audio_thread = threading.Thread(
693 |                 target=main,
694 |                 args=(
695 |                     param["ia"][i],
696 |                     tmp_audio[i],
697 |                     audio_not_merged_segs,
698 |                     f"[Audio.{i}]",
699 |                     lambda x: print(f"{bcolors.OKGREEN}{x}{bcolors.ENDC}"),
700 |                 ),
701 |                 daemon=True,
702 |             )
703 | 
704 |             video_thread.start()
705 |             audio_thread.start()
706 | 
707 |             while video_thread.is_alive():
708 |                 video_thread.join(0.5)
709 |             while audio_thread.is_alive():
710 |                 audio_thread.join(0.5)
711 | 
712 |         if video_not_merged_segs:
713 |             logger.warn(f"Gived up video segments: {video_not_merged_segs}")
714 |         if audio_not_merged_segs:
715 |             logger.warn(f"Gived up audio segments: {audio_not_merged_segs}")
716 | 
717 |         logger.info("Download finished. Merging...")
718 | 
719 |         ffmpeg_params = []
720 |         if input_data is not None:
721 |             tmp_thumbnail = None
722 |             with urllib.request.urlopen(
723 |                 input_data["metadata"]["thumbnail"]
724 |             ) as response:
725 |                 with tempfile.NamedTemporaryFile(
726 |                     delete=False, prefix="ytarchive_raw.", suffix=".jpg", dir=BASE_DIR
727 |                 ) as tmp_file:
728 |                     shutil.copyfileobj(response, tmp_file)
729 |                     tmp_thumbnail = tmp_file.name
730 | 
731 |             ffmpeg_params = [
732 |                 "-metadata",
733 |                 'title="{}"'.format(input_data["metadata"]["title"].replace('"', "''")),
734 |                 "-metadata",
735 |                 'comment="{}"'.format(
736 |                     input_data["metadata"]["description"].replace('"', "''")
737 |                 ),
738 |                 "-metadata",
739 |                 'author="{}"'.format(
740 |                     input_data["metadata"]["channelName"].replace('"', "''")
741 |                 ),
742 |                 "-metadata",
743 |                 'episode_id="{}"'.format(
744 |                     input_data["metadata"]["id"].replace('"', "''")
745 |                 ),
746 |                 "-attach",
747 |                 tmp_thumbnail,
748 |                 "-metadata:s:t",
749 |                 "mimetype=image/jpeg",
750 |                 "-metadata:s:t",
751 |                 'filename="thumbnail.jpg"',
752 |             ]
753 | 
754 |         # have FFmpeg write the full log to a tempfile,
755 |         # in addition to the terse log on stdout/stderr.
756 |         # The logfile will be overwritten every time
757 |         # so we'll keep appending the contents to ff_logtext
758 |         with tempfile.NamedTemporaryFile(
759 |             delete=False, prefix="ytarchive_raw.", suffix=".ffmpeg.log", dir=BASE_DIR
760 |         ) as tmp_file:
761 |             ff_logpath = tmp_file.name
762 | 
763 |         ff_logtext = ""
764 |         ff_env = os.environ.copy()
765 |         ff_env["FFREPORT"] = f"file='{ff_logpath}':level=32"  # 32=info/normal
766 | 
767 |         if len(tmp_video) == 1:
768 |             cmd = (
769 |                 [
770 |                     "ffmpeg",
771 |                     "-y",
772 |                     "-v",
773 |                     "warning",
774 |                     "-i",
775 |                     tmp_video[0],
776 |                     "-i",
777 |                     tmp_audio[0],
778 |                     "-c",
779 |                     "copy",
780 |                 ]
781 |                 + ffmpeg_params
782 |                 + [param["output"]]
783 |             )
784 |             logger.debug(f"ffmpeg command: {cmd}")
785 |             p = subprocess.Popen(
786 |                 cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=ff_env
787 |             )
788 |             out, err = p.communicate()
789 |             retcode = p.returncode
790 |             ff_logtext += readfile(ff_logpath)
791 | 
792 |             if isinstance(out, bytes):
793 |                 out = out.decode(sys.stdout.encoding)
794 |             if isinstance(err, bytes):
795 |                 err = err.decode(sys.stdout.encoding)
796 |         else:
797 |             tmp_merged = []
798 |             out = ""
799 |             err = ""
800 |             retcode = 0
801 |             for i in range(len(param["iv"])):
802 |                 with tempfile.NamedTemporaryFile(
803 |                     prefix="ytarchive_raw.", suffix=f".merged.{i}.mkv", dir=BASE_DIR
804 |                 ) as tmp_merged_f:
805 |                     tmp_merged.append(tmp_merged_f.name)
806 | 
807 |                 cmd = [
808 |                     "ffmpeg",
809 |                     "-y",
810 |                     "-v",
811 |                     "warning",
812 |                     "-i",
813 |                     tmp_video[i],
814 |                     "-i",
815 |                     tmp_audio[i],
816 |                     "-c",
817 |                     "copy",
818 |                     tmp_merged[i],
819 |                 ]
820 |                 logger.debug(f"ffmpeg command merging [{video_idx}]: {cmd}")
821 |                 p = subprocess.Popen(
822 |                     cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=ff_env
823 |                 )
824 | 
825 |                 out_i, err_i = p.communicate()
826 |                 retcode = retcode or p.returncode
827 |                 ff_logtext += readfile(ff_logpath)
828 | 
829 |                 if isinstance(out_i, bytes):
830 |                     out += out_i.decode(sys.stdout.encoding)
831 |                 if isinstance(err_i, bytes):
832 |                     err += err_i.decode(sys.stdout.encoding)
833 | 
834 |             merged_file_list = ""
835 |             with tempfile.NamedTemporaryFile(
836 |                 delete=False,
837 |                 prefix="ytarchive_raw.",
838 |                 suffix=".merged.txt",
839 |                 dir=BASE_DIR,
840 |                 encoding="utf-8",
841 |                 mode="w+",
842 |             ) as tmp_file:
843 |                 data = []
844 |                 for x in tmp_merged:
845 |                     data.append(f"file '{x}'")
846 |                 data = "\n".join(data)
847 |                 tmp_file.write(data)
848 |                 merged_file_list = tmp_file.name
849 |             if os.name == "nt":
850 |                 cmd = ["ffmpeg", "-y", "-safe", "0", "-f", "concat"]
851 |      
852 |                 cmd = ["ffmpeg", "-y", "-f", "concat", "-safe", "0"]
853 | 
854 |             cmd += (
855 |                 ["-v", "warning", "-i", merged_file_list, "-c", "copy"]
856 |                 + ffmpeg_params
857 |                 + [param["output"]]
858 |             )
859 |             p = subprocess.Popen(
860 |                 cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=ff_env
861 |             )
862 |             retcode = retcode or p.returncode
863 |             ff_logtext += readfile(ff_logpath)
864 | 
865 |             out_i, err_i = p.communicate()
866 | 
867 |             if isinstance(out_i, bytes):
868 |                 out += out_i.decode(sys.stdout.encoding)
869 |             if isinstance(err_i, bytes):
870 |                 err += err_i.decode(sys.stdout.encoding)
871 | 
872 |         logger.debug(f"FFmpeg complete log:\n{ff_logtext}\n")
873 | 
874 |         # remove harmless warnings
875 |         err = err.split("\n")
876 |         for ignore in [
877 |             "    Last message repeated ",
878 |             "Found duplicated MOOV Atom. Skipped it",
879 |             "Found unknown-length element with ID 0x18538067 at pos.",  # segment header
880 |         ]:
881 |             err = [x for x in err if ignore not in x]
882 |         err = "\n".join(err)
883 | 
884 |         if retcode:
885 |             logger.error(f"FFmpeg complete log:\n{ff_logtext}\n")
886 |             logger.error(f"FFmpeg:\n{err}\n\nFailed with error {retcode}")
887 |         elif err:
888 |             logger.warn(f"FFmpeg:\n{err}\n\nSuccess, but with warnings")
889 |         else:
890 |             logger.info("All good!")
891 | 
892 |     except KeyboardInterrupt as e:
893 |         logger.info("Program stopped.")
894 | 
895 |     finally:
896 |         try:
897 |             if param["delete_tmp"]:
898 |                 shutil.rmtree(BASE_DIR, ignore_errors=True)
899 |         except:
900 |             pass
901 | 


--------------------------------------------------------------------------------