├── tiktok_dl ├── version.py ├── __init__.py ├── archive.py ├── utils.py ├── schema.py ├── extractor.py ├── downloader.py └── app.py ├── requirements.txt ├── GitVersion.yml ├── Makefile ├── setup.py ├── LICENSE ├── USAGE.md ├── .gitignore ├── .flake8 ├── README.md └── .pylintrc /tiktok_dl/version.py: -------------------------------------------------------------------------------- 1 | version = "1.0.0" 2 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | requests==2.22.0 2 | loguru==0.2.5 3 | setuptools==41.4.0 4 | jsonschema==3.1.1 5 | -------------------------------------------------------------------------------- /GitVersion.yml: -------------------------------------------------------------------------------- 1 | mode: ContinuousDelivery 2 | next-version: 1.0.0 3 | branches: {} 4 | ignore: 5 | sha: [] 6 | merge-message-formats: {} 7 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: build 2 | 3 | build: 4 | python setup.py bdist_wheel --bdist-dir ~/temp/bdistwheel 5 | 6 | install: 7 | python setup.py install -------------------------------------------------------------------------------- /tiktok_dl/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from .schema import aweme_validate 5 | from .downloader import Downloader 6 | 7 | # if somebody does "from somepackage import *", this is what they will 8 | # be able to access: 9 | __all__ = ["Downloader", "aweme_validate"] 10 | -------------------------------------------------------------------------------- /tiktok_dl/archive.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | 4 | class ArchiveManager: 5 | def __init__(self, download_archive=None): 6 | self.download_archive = download_archive 7 | self.is_init = False 8 | self.archive = self._read_archive() 9 | 10 | def _read_archive(self): 11 | if os.path.isfile(self.download_archive): 12 | with open(self.download_archive) as f: 13 | data = f.read() 14 | return data.split("\n") 15 | return list() 16 | 17 | def _write_archive(self, items: list): 18 | with open(self.download_archive, "a", encoding="utf-8") as f: 19 | for video_id in items: 20 | f.write("%s\n" % video_id) 21 | 22 | def exist(self, video_id: str): 23 | return video_id in self.archive 24 | 25 | def append(self, video_id): 26 | self._write_archive(list(video_id)) -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from setuptools import find_packages, setup 5 | 6 | from tiktok_dl.version import version 7 | 8 | requires = ["requests>=2.23.0", "loguru>=0.2.5", "jsonschema>=3.1.1"] 9 | 10 | with open("README.md", "r", encoding="utf-8") as f: 11 | long_description = f.read() 12 | 13 | 14 | setup( 15 | name="tiktok-dl", 16 | version=version, 17 | author="Aakash Gajjar", 18 | author_email="skyme5@gmx.com", 19 | description="TikTok video downloader", 20 | long_description=long_description, 21 | long_description_content_type="text/markdown", 22 | url="https://github.com/skyme5/tiktok-dl", 23 | packages=find_packages(exclude=["tests"]), 24 | install_requires=requires, 25 | entry_points={"console_scripts": ["tiktok-dl=tiktok_dl.app:main"],}, 26 | classifiers=[ 27 | "Programming Language :: Python :: 3", 28 | "License :: OSI Approved :: MIT License", 29 | "Operating System :: OS Independent", 30 | ], 31 | keywords="tiktok video downloader", 32 | ) 33 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2018 The Python Packaging Authority 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | SOFTWARE. -------------------------------------------------------------------------------- /USAGE.md: -------------------------------------------------------------------------------- 1 | usage: tiktok-dl [-h] [-a ARCHIVE_LOCATION] [-f FILENAME] [-o OUTPUT_TEMPLATE] 2 | [-P DIRECTORY-PREFIX] [--no-metadata-json NO_METADATA_JSON] 3 | [URL [URL ...]] 4 | 5 | TikTok Video downloader 6 | 7 | positional arguments: 8 | URL URL of the video 9 | 10 | optional arguments: 11 | -h, --help show this help message and exit 12 | -a ARCHIVE_LOCATION, --archive-location ARCHIVE_LOCATION 13 | Download only videos not listed in the archive file. 14 | Record the IDs of all downloaded videos in it. 15 | -f FILENAME, --filename FILENAME 16 | Path to a file containing a list of urls to download 17 | -o OUTPUT_TEMPLATE, --output OUTPUT_TEMPLATE 18 | Output filename template, see the "OUTPUT TEMPLATE" 19 | for all the info. 20 | -P DIRECTORY-PREFIX, --directory-prefix DIRECTORY-PREFIX 21 | Prefix path to filenames. 22 | --no-metadata-json NO_METADATA_JSON 23 | Do not create a JSON file containing the metadata of 24 | each video. 25 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # SageMath parsed files 80 | *.sage.py 81 | 82 | # dotenv 83 | .env 84 | 85 | # virtualenv 86 | .venv 87 | venv/ 88 | ENV/ 89 | 90 | # Spyder project settings 91 | .spyderproject 92 | .spyproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | 97 | # mkdocs documentation 98 | /site 99 | 100 | # mypy 101 | .mypy_cache/ 102 | *.json 103 | 104 | log.txt 105 | json/ 106 | config.json 107 | organize.rb 108 | run.bat 109 | /story 110 | Pipfile.lock 111 | build.bat 112 | instagram-story_cache.txt 113 | 23.py 114 | archive*.txt 115 | list.txt 116 | *.bat 117 | response.json 118 | -------------------------------------------------------------------------------- /tiktok_dl/utils.py: -------------------------------------------------------------------------------- 1 | import re 2 | from datetime import datetime 3 | 4 | from loguru import logger 5 | from requests.exceptions import InvalidURL 6 | 7 | 8 | def format_utctime(time: int, fmt: str): 9 | return datetime.utcfromtimestamp(time).strftime(fmt) 10 | 11 | 12 | def search_regex( 13 | pattern, string: str, name: str, default=object(), fatal=True, flags=0, group=None 14 | ): 15 | """ 16 | Perform a regex search on the given string, using a single or a list of 17 | patterns returning the first matching group. 18 | In case of failure return a default value or raise a WARNING or a 19 | RegexNotFoundError, depending on fatal, specifying the field name. 20 | """ 21 | if isinstance(pattern, (str, type(re.compile("")))): 22 | mobj = re.search(pattern, string, flags) 23 | else: 24 | for p in pattern: 25 | mobj = re.search(p, string, flags) 26 | if mobj: 27 | break 28 | 29 | if mobj: 30 | if group is None: 31 | # return the first matching group 32 | return next(g for g in mobj.groups() if g is not None) 33 | else: 34 | return mobj.group(group) 35 | elif default is not default: 36 | return default 37 | elif fatal: 38 | raise re.error("Unable to extract %s" % name) 39 | else: 40 | logger.error("unable to extract {}", name) 41 | return None 42 | 43 | 44 | def valid_url_re(): 45 | return re.compile( 46 | r"https?://www\.tiktokv?\.com/(?:@[\w\._]+|share)/video/(?P\d+)" 47 | ) 48 | 49 | 50 | def match_id(url: str, valid_re): 51 | m = valid_re.match(url) 52 | if m is None: 53 | raise InvalidURL("Url is invalid {}".format(url)) 54 | if m.group("id") is None: 55 | raise re.error("unable to find video id {}".format(url)) 56 | 57 | return str(m.group("id")) 58 | 59 | 60 | def try_get(src, getter, expected_type=None): 61 | if not isinstance(getter, (list, tuple)): 62 | getter = [getter] 63 | for get in getter: 64 | try: 65 | v = get(src) 66 | except (AttributeError, KeyError, TypeError, IndexError): 67 | pass 68 | else: 69 | if expected_type is None or isinstance(v, expected_type): 70 | return v 71 | 72 | 73 | def str_or_none(v, default=None): 74 | return default if v is None else str(v) 75 | 76 | 77 | def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1): 78 | if get_attr: 79 | if v is not None: 80 | v = getattr(v, get_attr, None) 81 | if v == "": 82 | v = None 83 | if v is None: 84 | return default 85 | try: 86 | return int(v) * invscale // scale 87 | except (ValueError, TypeError): 88 | return default 89 | -------------------------------------------------------------------------------- /tiktok_dl/schema.py: -------------------------------------------------------------------------------- 1 | from jsonschema import ValidationError, validate 2 | 3 | 4 | def aweme_validate(json_data: dict): 5 | SCHEMA = { 6 | "type": "object", 7 | "properties": { 8 | "id": {"type": "string"}, 9 | "play_urls": {"type": "array"}, 10 | "ext": {"type": "string"}, 11 | "width": {"type": "number"}, 12 | "height": {"type": "number"}, 13 | "duration": {"type": "number"}, 14 | "thumbnails": {"type": "array"}, 15 | "comment_count": {"type": "number"}, 16 | "digg_count": {"type": "number"}, 17 | "share_count": {"type": "number"}, 18 | "play_count": {"type": "number"}, 19 | "create_time": {"type": "number"}, 20 | "upload_date": {"type": "string"}, 21 | "title": {"type": "string"}, 22 | "description": {"type": "string"}, 23 | "nick_name": {"type": "string"}, 24 | "unique_id": {"type": "string"}, 25 | "sec_uid": {"type": "string"}, 26 | "user_id": {"type": "string"}, 27 | "user_url": {"type": "string"}, 28 | "profile_pics": {"type": "array"}, 29 | "webpage_url": {"type": "string"}, 30 | "follower_count": {"type": "number"}, 31 | "heart_total": {"type": "string"}, 32 | "challenge_list": {"type": "array"}, 33 | "duet_info": {"type": "string"}, 34 | "text_extra": {"type": "array"}, 35 | "music_id": {"type": "string"}, 36 | "music_title": {"type": "string"}, 37 | "music_artist": {"type": "string"}, 38 | "music_covers": {"type": "array"}, 39 | }, 40 | "required": [ 41 | "challenge_list", 42 | "comment_count", 43 | "create_time", 44 | "description", 45 | "digg_count", 46 | "duet_info", 47 | "duration", 48 | "ext", 49 | "follower_count", 50 | "heart_total", 51 | "height", 52 | "id", 53 | "music_artist", 54 | "music_covers", 55 | "music_id", 56 | "music_title", 57 | "nick_name", 58 | "play_count", 59 | "play_urls", 60 | "profile_pics", 61 | "sec_uid", 62 | "share_count", 63 | "text_extra", 64 | "thumbnails", 65 | "title", 66 | "unique_id", 67 | "upload_date", 68 | "user_id", 69 | "user_url", 70 | "webpage_url", 71 | "width", 72 | ], 73 | } 74 | 75 | try: 76 | validate(instance=json_data, schema=SCHEMA) 77 | except ValidationError as e: 78 | print("Error validating json_data", e) 79 | -------------------------------------------------------------------------------- /tiktok_dl/extractor.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | 3 | from tiktok_dl.utils import int_or_none, str_or_none, try_get 4 | 5 | 6 | def aweme_extractor(video_data: dict): 7 | video_info = try_get(video_data, lambda x: x["videoData"]["itemInfos"], dict) 8 | author_info = try_get(video_data, lambda x: x["videoData"]["authorInfos"], dict) 9 | share_info = try_get(video_data, lambda x: x["shareMeta"], dict) 10 | music_info = try_get(video_data, lambda x: x["videoData"]["musicInfos"], dict) 11 | author_stats = try_get(video_data, lambda x: x["videoData"]["authorStats"], dict) 12 | 13 | unique_id = str_or_none(author_info.get("uniqueId")) 14 | timestamp = try_get(video_info, lambda x: int(x["createTime"]), int) 15 | date = datetime.utcfromtimestamp(timestamp).strftime("%Y%m%d") 16 | 17 | height = try_get(video_info, lambda x: x["video"]["videoMeta"]["height"], int) 18 | width = try_get(video_info, lambda x: x["video"]["videoMeta"]["width"], int) 19 | 20 | return { 21 | "id": str_or_none(video_info.get("id")), 22 | "play_urls": try_get(video_info, lambda x: x["video"]["urls"], list), 23 | "ext": "mp4", 24 | "width": width, 25 | "height": height, 26 | "duration": try_get( 27 | video_info, lambda x: x["video"]["videoMeta"]["duration"], int 28 | ), 29 | "thumbnails": try_get(video_info, lambda x: x["covers"], list), 30 | "comment_count": int_or_none(video_info.get("commentCount")), 31 | "digg_count": int_or_none(video_info.get("diggCount")), 32 | "share_count": int_or_none(video_info.get("shareCount")), 33 | "play_count": int_or_none(video_info.get("playCount")), 34 | "create_time": timestamp, 35 | "upload_date": date, 36 | "title": "{} on TikTok".format(str_or_none(author_info.get("nickName"))), 37 | "description": str_or_none(share_info.get("desc")), 38 | "nick_name": str_or_none(author_info.get("nickName")), 39 | "unique_id": unique_id, 40 | "sec_uid": str_or_none(author_info.get("secUid")), 41 | "user_id": str_or_none(author_info.get("userId")), 42 | "user_url": "https://www.tiktok.com/@" + unique_id, 43 | "profile_pics": try_get(author_info, lambda x: x["covers"], list), 44 | "webpage_url": "https://www.tiktok.com/@{}/video/{}?source=h5_t".format( 45 | str_or_none(author_info.get("uniqueId")), str_or_none(video_info.get("id")) 46 | ), 47 | "follower_count": int_or_none(author_stats.get("followerCount")), 48 | "heart_total": str_or_none(author_stats.get("heartCount")), 49 | "challenge_list": try_get( 50 | video_data, lambda x: x["videoData"]["challengeInfoList"], list 51 | ), 52 | "duet_info": try_get(video_data, lambda x: x["videoData"]["duetInfo"], str), 53 | "text_extra": try_get(video_data, lambda x: x["videoData"]["textExtra"], list), 54 | "music_id": str_or_none(music_info.get("musicId")), 55 | "music_title": str_or_none(music_info.get("musicName")), 56 | "music_artist": str_or_none(music_info.get("authorName")), 57 | "music_covers": try_get(music_info, lambda x: x["covers"], list), 58 | } 59 | -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- 1 | # .flake8 2 | # 3 | # DESCRIPTION 4 | # Configuration file for the python linter flake8. 5 | # 6 | # This configuration is based on the generic 7 | # configuration published on GitHub. 8 | # 9 | # AUTHOR 10 | # krnd 11 | # 12 | # VERSION 13 | # 1.0 14 | # 15 | # SEE ALSO 16 | # http://flake8.pycqa.org/en/latest/user/options.html 17 | # http://flake8.pycqa.org/en/latest/user/error-codes.html 18 | # https://pycodestyle.readthedocs.io/en/latest/intro.html#error-codes 19 | # https://gist.github.com/krnd 20 | # 21 | 22 | 23 | [flake8] 24 | 25 | ################### PROGRAM ################################ 26 | 27 | # Specify the number of subprocesses that Flake8 will use to run checks in parallel. 28 | jobs = auto 29 | 30 | 31 | ################### OUTPUT ################################# 32 | 33 | ########## VERBOSITY ########## 34 | 35 | # Increase the verbosity of Flake8’s output. 36 | verbose = 0 37 | # Decrease the verbosity of Flake8’s output. 38 | quiet = 0 39 | 40 | 41 | ########## FORMATTING ########## 42 | 43 | # Select the formatter used to display errors to the user. 44 | format = default 45 | 46 | # Print the total number of errors. 47 | count = True 48 | # Print the source code generating the error/warning in question. 49 | show-source = True 50 | # Count the number of occurrences of each error/warning code and print a report. 51 | statistics = True 52 | 53 | 54 | ########## TARGETS ########## 55 | 56 | # Redirect all output to the specified file. 57 | output-file = .flake8.log 58 | # Also print output to stdout if output-file has been configured. 59 | tee = True 60 | 61 | 62 | ################### FILE PATTERNS ########################## 63 | 64 | # Provide a comma-separated list of glob patterns to exclude from checks. 65 | exclude = 66 | # git folder 67 | .git, 68 | # python cache 69 | __pycache__, 70 | # Provide a comma-separate list of glob patterns to include for checks. 71 | filename = 72 | *.py 73 | 74 | 75 | ################### LINTING ################################ 76 | 77 | ########## ENVIRONMENT ########## 78 | 79 | # Provide a custom list of builtin functions, objects, names, etc. 80 | builtins = 81 | 82 | 83 | ########## OPTIONS ########## 84 | 85 | # Report all errors, even if it is on the same line as a `# NOQA` comment. 86 | disable-noqa = False 87 | 88 | # Set the maximum length that any line (with some exceptions) may be. 89 | max-line-length = 100 90 | # Set the maximum allowed McCabe complexity value for a block of code. 91 | max-complexity = 10 92 | # Toggle whether pycodestyle should enforce matching the indentation of the opening bracket’s line. 93 | # incluences E131 and E133 94 | hang-closing = True 95 | 96 | 97 | ########## RULES ########## 98 | 99 | # ERROR CODES 100 | # 101 | # E/W - PEP8 errors/warnings (pycodestyle) 102 | # F - linting errors (pyflakes) 103 | # C - McCabe complexity error (mccabe) 104 | # 105 | # W503 - line break before binary operator 106 | 107 | # Specify a list of codes to ignore. 108 | ignore = 109 | W503 110 | # Specify the list of error codes you wish Flake8 to report. 111 | select = 112 | E, 113 | W, 114 | F, 115 | C 116 | # Enable off-by-default extensions. 117 | enable-extensions = 118 | 119 | 120 | ########## DOCSTRING ########## 121 | 122 | # Enable PyFlakes syntax checking of doctests in docstrings. 123 | doctests = True 124 | 125 | # Specify which files are checked by PyFlakes for doctest syntax. 126 | include-in-doctest = 127 | # Specify which files are not to be checked by PyFlakes for doctest syntax. 128 | exclude-in-doctest = 129 | -------------------------------------------------------------------------------- /tiktok_dl/downloader.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import re 4 | import time 5 | 6 | import requests 7 | import urllib3 8 | from loguru import logger 9 | from tiktok_dl.extractor import aweme_extractor 10 | from tiktok_dl.schema import aweme_validate 11 | from tiktok_dl.utils import ( 12 | format_utctime, 13 | match_id, 14 | search_regex, 15 | try_get, 16 | valid_url_re, 17 | ) 18 | from tiktok_dl.version import version 19 | 20 | 21 | class URLExistsInArchive(Exception): 22 | pass 23 | 24 | 25 | class Downloader: 26 | def __init__( 27 | self, 28 | directory_prefix=None, 29 | dump_json=False, 30 | get_url=False, 31 | max_sleep_interval=0, 32 | no_check_certificate=False, 33 | no_overwrite=False, 34 | no_warnings=False, 35 | no_write_json=False, 36 | output_template="{Y}-{d}-{m}_{H}-{M}-{S} {id}_{user_id}", 37 | print_json=False, 38 | quiet=False, 39 | simulate=False, 40 | skip_download=False, 41 | sleep_interval=0.2, 42 | verbose=True, 43 | write_description=False, 44 | write_thumbnail=True, 45 | urls=None, 46 | ): 47 | self.directory_prefix = directory_prefix 48 | self.dump_json = dump_json 49 | self.get_url = get_url 50 | self.max_sleep_interval = max_sleep_interval 51 | self.no_check_certificate = no_check_certificate 52 | self.no_overwrite = no_overwrite 53 | self.no_warnings = no_warnings 54 | self.no_write_json = no_write_json 55 | self.output_template = output_template 56 | self.print_json = print_json 57 | self.quiet = quiet 58 | self.simulate = simulate 59 | self.skip_download = skip_download 60 | self.sleep_interval = sleep_interval 61 | self.verbose = verbose 62 | self.write_description = write_description 63 | self.write_thumbnail = write_thumbnail 64 | self.urls = urls 65 | 66 | self.headers = { 67 | "user-agent": ( 68 | "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " 69 | "AppleWebKit/537.36 (KHTML, like Gecko) " 70 | "Chrome/83.0.4103.44 Safari/537.36" 71 | ) 72 | } 73 | self.reaponse_ok = requests.codes.get("ok") 74 | # urllib3.disable_warnings() 75 | 76 | def _parse_json(self, json_string: str, video_id: str, fatal=True): 77 | try: 78 | return json.loads(json_string) 79 | except ValueError as ve: 80 | errmsg = "{}: Failed to parse JSON ".format(video_id) 81 | if fatal: 82 | raise Exception(errmsg, cause=ve) 83 | else: 84 | logger.error(errmsg + str(ve)) 85 | 86 | def _download_webpage(self, url: str, video_id: str, note="Downloading webpage"): 87 | logger.debug("{} {}", note, video_id) 88 | r = requests.get(url, verify=False, headers=self.headers) 89 | return r.text 90 | 91 | def _fetch_data(self, url: str): 92 | video_id = match_id(url, valid_url_re) 93 | 94 | webpage = self._download_webpage( 95 | url, video_id, note="Downloading video webpage" 96 | ) 97 | json_string = search_regex( 98 | r"id=\"__NEXT_DATA__\"\s+type=\"application\/json\"\s*[^>]+>\s*(?P[^<]+)", 99 | webpage, 100 | "json_string", 101 | group="json_string_id", 102 | ) 103 | json_data = self._parse_json(json_string, video_id) 104 | aweme_data = try_get( 105 | json_data, lambda x: x["props"]["pageProps"], expected_type=dict 106 | ) 107 | 108 | if aweme_data.get("statusCode") != 0: 109 | raise FileNotFoundError("Video not available " + video_id) 110 | 111 | return { 112 | "video_data": aweme_extractor(video_data=aweme_data), 113 | "aweme_data": aweme_data, 114 | "tiktok-dl": version, 115 | "timestamp": int(time.time()), 116 | } 117 | 118 | def _expand_path(self, path): 119 | if self.directory_prefix is None: 120 | return path 121 | return os.path.join(self.directory_prefix, path) 122 | 123 | def _output_format(self, json_data: dict): 124 | def enhance_json_data(json_data): 125 | data = dict(json_data) 126 | timestamp = data.get("create_time") 127 | data["Y"] = format_utctime(time=timestamp, fmt="%Y") 128 | data["m"] = format_utctime(time=timestamp, fmt="%m") 129 | data["d"] = format_utctime(time=timestamp, fmt="%d") 130 | data["H"] = format_utctime(time=timestamp, fmt="%H") 131 | data["M"] = format_utctime(time=timestamp, fmt="%M") 132 | data["S"] = format_utctime(time=timestamp, fmt="%S") 133 | return data 134 | 135 | enhanced = enhance_json_data(json_data) 136 | return self.output_template.format(**enhanced) 137 | 138 | def _save_json(self, data: dict, dest: str): 139 | if not os.path.exists(os.path.dirname(dest)): 140 | os.makedirs(os.path.dirname(dest)) 141 | 142 | with open(dest, "w", encoding="utf-8") as f: 143 | json.dump(data, f, ensure_ascii=False) 144 | 145 | def _download_url(self, url: str, dest: str): 146 | if not os.path.exists(os.path.dirname(dest)): 147 | os.makedirs(os.path.dirname(dest), exist_ok=True) 148 | 149 | try: 150 | if os.path.getsize(dest) == 0: 151 | os.remove(dest) 152 | except FileNotFoundError: 153 | pass 154 | 155 | try: 156 | with open(dest, "xb") as handle: 157 | response = requests.get(url, stream=True, timeout=160) 158 | if response.status_code != self.reaponse_ok: 159 | response.raise_for_status() 160 | 161 | logger.debug("Downloading to {}".format(dest)) 162 | for data in response.iter_content(chunk_size=4194304): 163 | handle.write(data) 164 | handle.close() 165 | except FileExistsError: 166 | pass 167 | except requests.exceptions.RequestException: 168 | logger.error("File {} not found on Server {}".format(dest, url)) 169 | pass 170 | 171 | if os.path.getsize(dest) == 0: 172 | os.remove(dest) 173 | 174 | def _download_media(self, video_data: dict, filepath: str): 175 | video_url = video_data["play_urls"][0] 176 | self._download_url(video_url, self._expand_path(filepath + ".mp4")) 177 | cover_url = video_data["thumbnails"][0] 178 | self._download_url(cover_url, self._expand_path(filepath + ".jpg")) 179 | 180 | def download(self, url: str): 181 | try: 182 | data = self._fetch_data(url) 183 | aweme_validate(data.get("video_data")) 184 | filepath = self._output_format(data.get("video_data")) 185 | self._download_media(data.get("video_data"), filepath) 186 | self._save_json(data, self._expand_path(filepath + ".json")) 187 | except requests.exceptions.InvalidURL as e: 188 | logger.error(e) 189 | pass 190 | except ConnectionError as e: 191 | logger.error(e) 192 | pass 193 | except re.error as e: 194 | logger.error(e) 195 | pass 196 | except FileNotFoundError as e: 197 | logger.warning(e) 198 | pass 199 | -------------------------------------------------------------------------------- /tiktok_dl/app.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | import argparse 5 | import os 6 | 7 | from loguru import logger 8 | from tiktok_dl.downloader import Downloader 9 | from tiktok_dl.utils import match_id, valid_url_re 10 | from tiktok_dl.version import version 11 | 12 | 13 | def main(): 14 | parser = argparse.ArgumentParser( 15 | description="TikTok Video downloader", 16 | usage="Usage: tiktok-dl [options] URL [URL...]", 17 | ) 18 | 19 | parser.add_argument( 20 | "-V", 21 | "--version", 22 | action="version", 23 | version=version, 24 | help="Print program version and exit", 25 | ) 26 | 27 | parser.add_argument( 28 | "urls", metavar="URL", nargs="*", type=str, help="URL of the video" 29 | ) 30 | 31 | video_selection_group = parser.add_argument_group("Video Selection") 32 | video_selection_group.add_argument( 33 | "--download-archive", 34 | metavar="DOWNLOAD_ARCHIVE", 35 | type=str, 36 | dest="download_archive", 37 | default=None, 38 | help="Download only videos not listed in the archive file. " 39 | "Record the IDs of all downloaded videos in it.", 40 | ) 41 | 42 | parallel_download_group = parser.add_argument_group("Parallel Download") 43 | parallel_download_group.add_argument( 44 | "-d", 45 | "--daemon", 46 | action="store_true", 47 | dest="daemon", 48 | help="Run as daemon.", 49 | ) 50 | parallel_download_group.add_argument( 51 | "-p", 52 | "--concurrent-count", 53 | metavar="CONCURRENT_COUNT", 54 | type=int, 55 | dest="concurrent_count", 56 | default=2, 57 | help="Download videos in parallel.", 58 | ) 59 | 60 | filesystem_group = parser.add_argument_group("Filesystem Options") 61 | filesystem_group.add_argument( 62 | "-a", 63 | "--batch-file", 64 | metavar="FILENAME", 65 | type=str, 66 | dest="batch_file", 67 | default=None, 68 | help="File containing URLs to download ('-' for stdin), one URL per line. " 69 | "Lines starting with '#', ';' or ']' are considered as comments and ignored.", 70 | ) 71 | filesystem_group.add_argument( 72 | "-o", 73 | "--output", 74 | metavar="OUTPUT_TEMPLATE", 75 | type=str, 76 | dest="output_template", 77 | default="{Y}-{d}-{m}_{H}-{M}-{S} {id}_{user_id}", 78 | help='Output filename template, see the "OUTPUT TEMPLATE" for all the info.', 79 | ) 80 | filesystem_group.add_argument( 81 | "-w", 82 | "--no-overwrites", 83 | action="store_true", 84 | dest="no_overwrite", 85 | default=False, 86 | help="Do not overwrite files", 87 | ) 88 | filesystem_group.add_argument( 89 | "--write-description", 90 | action="store_true", 91 | dest="write_description", 92 | help="Write video description to a .description file.", 93 | ) 94 | filesystem_group.add_argument( 95 | "--no-write-json", 96 | action="store_true", 97 | dest="no_write_json", 98 | default=False, 99 | help="Write video metadata to a .json file.", 100 | ) 101 | filesystem_group.add_argument( 102 | "-P", 103 | "--directory-prefix", 104 | metavar="DIRECTORY_PREFIX", 105 | type=str, 106 | dest="directory_prefix", 107 | default=None, 108 | help="Directory prefix.", 109 | ) 110 | 111 | thumbnail_group = parser.add_argument_group("Thumbnail images") 112 | thumbnail_group.add_argument( 113 | "--write-thumbnail", 114 | action="store_true", 115 | dest="write_thumbnail", 116 | default=True, 117 | help="Write thumbnail image to disk.", 118 | ) 119 | 120 | simulation_group = parser.add_argument_group("Verbosity / Simulation Options:") 121 | simulation_group.add_argument( 122 | "-q", 123 | "--quiet", 124 | dest="quiet", 125 | action="store_true", 126 | default=False, 127 | help="Activate quiet mode.", 128 | ) 129 | simulation_group.add_argument( 130 | "--no-warnings", 131 | action="store_true", 132 | dest="no_warnings", 133 | default=False, 134 | help="Ignore warnings.", 135 | ) 136 | simulation_group.add_argument( 137 | "-s", 138 | "--simulate", 139 | action="store_true", 140 | dest="simulate", 141 | default=False, 142 | help="Do not download the video and do not write anything to disk.", 143 | ) 144 | simulation_group.add_argument( 145 | "--skip-download", 146 | action="store_true", 147 | dest="skip_download", 148 | default=False, 149 | help="Do not download the video.", 150 | ) 151 | simulation_group.add_argument( 152 | "-g", 153 | "--get-url", 154 | action="store_true", 155 | dest="get_url", 156 | default=False, 157 | help="Simulate, quiet but print URL.", 158 | ) 159 | simulation_group.add_argument( 160 | "-j", 161 | "--dump-json", 162 | action="store_true", 163 | dest="dump_json", 164 | default=False, 165 | help="Simulate, quiet but print JSON information. " 166 | 'See the "OUTPUT TEMPLATE" for a description of available keys.', 167 | ) 168 | simulation_group.add_argument( 169 | "--print-json", 170 | action="store_true", 171 | dest="print_json", 172 | default=False, 173 | help="Be quiet and print the video information as JSON (video is still being downloaded).", 174 | ) 175 | simulation_group.add_argument( 176 | "-v", 177 | "--verbose", 178 | action="store_false", 179 | dest="verbose", 180 | default=True, 181 | help="Print various debugging information.", 182 | ) 183 | 184 | workarounds_group = parser.add_argument_group("Workarounds") 185 | workarounds_group.add_argument( 186 | "--no-check-certificate", 187 | action="store_true", 188 | dest="no_check_certificate", 189 | default=False, 190 | help="Suppress HTTPS certificate validation.", 191 | ) 192 | workarounds_group.add_argument( 193 | "--sleep-interval", 194 | metavar="SLEEP_INTERVAL", 195 | type=float, 196 | dest="sleep_interval", 197 | default=0.2, 198 | help="Number of seconds to sleep before each download when used alone or " 199 | "a lower bound of a range for randomized sleep before each download " 200 | "(minimum possible number of seconds to sleep) when used along with " 201 | "--max-sleep-interval.", 202 | ) 203 | workarounds_group.add_argument( 204 | "--max-sleep-interval", 205 | metavar="MAX_SLEEP_INTERVAL", 206 | type=float, 207 | dest="max_sleep_interval", 208 | default=0, 209 | help="Upper bound of a range for randomized sleep before each " 210 | "download (maximum possible number of seconds to sleep). " 211 | "Must only be used along with --min-sleep-interval.", 212 | ) 213 | parser.set_defaults( 214 | batch_file=None, 215 | concurrent_count=1, 216 | daemon=False, 217 | directory_prefix=None, 218 | download_archive=None, 219 | dump_json=False, 220 | get_url=False, 221 | max_sleep_interval=0, 222 | no_check_certificate=False, 223 | no_overwrite=False, 224 | no_warnings=False, 225 | no_write_json=False, 226 | output_template="{Y}-{d}-{m}_{H}-{M}-{S} {id}_{user_id}", 227 | print_json=False, 228 | quiet=False, 229 | simulate=False, 230 | skip_download=False, 231 | sleep_interval=0.2, 232 | urls=[], 233 | verbose=True, 234 | write_description=False, 235 | write_thumbnail=True, 236 | ) 237 | 238 | args = parser.parse_args() 239 | 240 | if len(args.urls) == 0 and args.batch_file is None: 241 | parser.error("URL or file containing list of URLs (--batch-file) is required.") 242 | 243 | if args.batch_file is not None and os.path.isfile(args.batch_file): 244 | with open(args.batch_file, "r") as f: 245 | for url in f.read().split("\n"): 246 | if len(url.strip()) > 0: 247 | args.urls.append(url.strip()) 248 | 249 | logger.info('Downloading {} urls', len(args.urls)) 250 | 251 | t = Downloader( 252 | directory_prefix=args.directory_prefix, 253 | dump_json=args.dump_json, 254 | get_url=args.get_url, 255 | max_sleep_interval=args.max_sleep_interval, 256 | no_check_certificate=args.no_check_certificate, 257 | no_overwrite=args.no_overwrite, 258 | no_warnings=args.no_warnings, 259 | no_write_json=args.no_write_json, 260 | output_template=args.output_template, 261 | print_json=args.print_json, 262 | quiet=args.quiet, 263 | simulate=args.simulate, 264 | skip_download=args.skip_download, 265 | sleep_interval=args.sleep_interval, 266 | verbose=args.verbose, 267 | write_description=args.write_description, 268 | write_thumbnail=args.write_thumbnail, 269 | ) 270 | 271 | for url in args.urls: 272 | t.download(url) 273 | 274 | 275 | if __name__ == "__main__": 276 | main() 277 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Build Status](https://travis-ci.org/ytdl-org/youtube-dl.svg?branch=master)](https://travis-ci.org/ytdl-org/youtube-dl) 2 | 3 | tiktok-dl - download videos from tiktok.com 4 | 5 | - [INSTALLATION](#installation) 6 | - [DESCRIPTION](#description) 7 | - [OPTIONS](#options) 8 | - [Video Selection:](#video-selection) 9 | - [Filesystem Options:](#filesystem-options) 10 | - [Thumbnail images:](#thumbnail-images) 11 | - [Verbosity / Simulation Options:](#verbosity--simulation-options) 12 | - [Workarounds:](#workarounds) 13 | - [OUTPUT TEMPLATE](#output-template) 14 | - [Output template and Windows batch files](#output-template-and-windows-batch-files) 15 | - [Output template examples](#output-template-examples) 16 | 17 | # INSTALLATION 18 | 19 | To install tiktok-dl: 20 | 21 | ```bash 22 | $ pip install tiktok-dl 23 | ``` 24 | 25 | To update tiktok-dl: 26 | 27 | ```bash 28 | $ pip install tiktok-dl --upgrade 29 | ``` 30 | 31 | Alternatively, you can clone the project and run the following command to install: 32 | Make sure you cd into the *tiktok-dl-master* folder before performing the command below. 33 | 34 | ```bash 35 | $ python setup.py install 36 | ``` 37 | 38 | # DESCRIPTION 39 | **tiktok-dl** is a command-line program to download videos from tiktok.com. It requires the Python interpreter, version 2.6, 2.7, or 3.2+, and it is not platform specific. It should work on your Unix box, on Windows or on macOS. It is released to the public domain, which means you can modify it, redistribute it or use it however you like. 40 | 41 | ```bash 42 | tiktok-dl [OPTIONS] URL [URL...] 43 | ``` 44 | 45 | # OPTIONS 46 | 47 | ```bash 48 | -h, --help Print this help text and exit 49 | --version Print program version and exit 50 | ``` 51 | 52 | ## Video Selection: 53 | 54 | ```bash 55 | --download-archive FILE Download only videos not listed in the 56 | archive file. Record the IDs of all 57 | downloaded videos in it. 58 | ``` 59 | 60 | ## Filesystem Options: 61 | 62 | ```bash 63 | -a, --batch-file FILE File containing URLs to download ('-' for 64 | stdin), one URL per line. Lines starting 65 | with '#', ';' or ']' are considered as 66 | comments and ignored. 67 | -o, --output TEMPLATE Output filename template, see the "OUTPUT 68 | TEMPLATE" for all the info 69 | -w, --no-overwrites Do not overwrite files 70 | --write-description Write video description to a .description 71 | file 72 | --write-info-json Write video metadata to a .info.json file 73 | ``` 74 | 75 | ## Thumbnail images: 76 | 77 | ```bash 78 | --write-thumbnail Write thumbnail image to disk 79 | ``` 80 | 81 | ## Verbosity / Simulation Options: 82 | 83 | ```bash 84 | -q, --quiet Activate quiet mode 85 | --no-warnings Ignore warnings 86 | -s, --simulate Do not download the video and do not write 87 | anything to disk 88 | --skip-download Do not download the video 89 | -g, --get-url Simulate, quiet but print URL 90 | -e, --get-title Simulate, quiet but print title 91 | --get-id Simulate, quiet but print id 92 | --get-thumbnail Simulate, quiet but print thumbnail URL 93 | --get-description Simulate, quiet but print video description 94 | --get-duration Simulate, quiet but print video length 95 | --get-filename Simulate, quiet but print output filename 96 | -j, --dump-json Simulate, quiet but print JSON information. 97 | See the "OUTPUT TEMPLATE" for a description 98 | of available keys. 99 | --print-json Be quiet and print the video information as 100 | JSON (video is still being downloaded). 101 | -v, --verbose Print various debugging information 102 | ``` 103 | 104 | ## Workarounds: 105 | 106 | ```bash 107 | --no-check-certificate Suppress HTTPS certificate validation 108 | --sleep-interval SECONDS Number of seconds to sleep before each 109 | download when used alone or a lower bound 110 | of a range for randomized sleep before each 111 | download (minimum possible number of 112 | seconds to sleep) when used along with 113 | --max-sleep-interval. 114 | --max-sleep-interval SECONDS Upper bound of a range for randomized sleep 115 | before each download (maximum possible 116 | number of seconds to sleep). Must only be 117 | used along with --min-sleep-interval. 118 | ``` 119 | 120 | # OUTPUT TEMPLATE 121 | 122 | The `-o` option allows users to indicate a template for the output file names. 123 | 124 | **tl;dr:** [navigate me to examples](#output-template-examples). 125 | 126 | The basic usage is not to set any template arguments when downloading a single file, like in `youtube-dl -o funny_video.flv "https://some/video"`. However, it may contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [python string formatting operations](https://docs.python.org/2/library/stdtypes.html#string-formatting). For example, `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by formatting operations. Allowed names along with sequence type are: 127 | 128 | - `id` (string): Video identifier 129 | - `title` (string): Video title 130 | - `url` (string): Video URL 131 | - `ext` (string): Video filename extension 132 | - `alt_title` (string): A secondary title of the video 133 | - `display_id` (string): An alternative identifier for the video 134 | - `uploader` (string): Full name of the video uploader 135 | - `license` (string): License name the video is licensed under 136 | - `creator` (string): The creator of the video 137 | - `release_date` (string): The date (YYYYMMDD) when the video was released 138 | - `timestamp` (numeric): UNIX timestamp of the moment the video became available 139 | - `upload_date` (string): Video upload date (YYYYMMDD) 140 | - `uploader_id` (string): Nickname or id of the video uploader 141 | - `channel` (string): Full name of the channel the video is uploaded on 142 | - `channel_id` (string): Id of the channel 143 | - `location` (string): Physical location where the video was filmed 144 | - `duration` (numeric): Length of the video in seconds 145 | - `view_count` (numeric): How many users have watched the video on the platform 146 | - `like_count` (numeric): Number of positive ratings of the video 147 | - `dislike_count` (numeric): Number of negative ratings of the video 148 | - `repost_count` (numeric): Number of reposts of the video 149 | - `average_rating` (numeric): Average rating give by users, the scale used depends on the webpage 150 | - `comment_count` (numeric): Number of comments on the video 151 | - `age_limit` (numeric): Age restriction for the video (years) 152 | - `is_live` (boolean): Whether this video is a live stream or a fixed-length video 153 | - `start_time` (numeric): Time in seconds where the reproduction should start, as specified in the URL 154 | - `end_time` (numeric): Time in seconds where the reproduction should end, as specified in the URL 155 | - `format` (string): A human-readable description of the format 156 | - `format_id` (string): Format code specified by `--format` 157 | - `format_note` (string): Additional info about the format 158 | - `width` (numeric): Width of the video 159 | - `height` (numeric): Height of the video 160 | - `resolution` (string): Textual description of width and height 161 | - `tbr` (numeric): Average bitrate of audio and video in KBit/s 162 | - `abr` (numeric): Average audio bitrate in KBit/s 163 | - `acodec` (string): Name of the audio codec in use 164 | - `asr` (numeric): Audio sampling rate in Hertz 165 | - `vbr` (numeric): Average video bitrate in KBit/s 166 | - `fps` (numeric): Frame rate 167 | - `vcodec` (string): Name of the video codec in use 168 | - `container` (string): Name of the container format 169 | - `filesize` (numeric): The number of bytes, if known in advance 170 | - `filesize_approx` (numeric): An estimate for the number of bytes 171 | - `protocol` (string): The protocol that will be used for the actual download 172 | - `extractor` (string): Name of the extractor 173 | - `extractor_key` (string): Key name of the extractor 174 | - `epoch` (numeric): Unix epoch when creating the file 175 | - `autonumber` (numeric): Five-digit number that will be increased with each download, starting at zero 176 | - `playlist` (string): Name or id of the playlist that contains the video 177 | - `playlist_index` (numeric): Index of the video in the playlist padded with leading zeros according to the total length of the playlist 178 | - `playlist_id` (string): Playlist identifier 179 | - `playlist_title` (string): Playlist title 180 | - `playlist_uploader` (string): Full name of the playlist uploader 181 | - `playlist_uploader_id` (string): Nickname or id of the playlist uploader 182 | 183 | Available for the video that belongs to some logical chapter or section: 184 | 185 | - `chapter` (string): Name or title of the chapter the video belongs to 186 | - `chapter_number` (numeric): Number of the chapter the video belongs to 187 | - `chapter_id` (string): Id of the chapter the video belongs to 188 | 189 | Available for the video that is an episode of some series or programme: 190 | 191 | - `series` (string): Title of the series or programme the video episode belongs to 192 | - `season` (string): Title of the season the video episode belongs to 193 | - `season_number` (numeric): Number of the season the video episode belongs to 194 | - `season_id` (string): Id of the season the video episode belongs to 195 | - `episode` (string): Title of the video episode 196 | - `episode_number` (numeric): Number of the video episode within a season 197 | - `episode_id` (string): Id of the video episode 198 | 199 | Available for the media that is a track or a part of a music album: 200 | 201 | - `track` (string): Title of the track 202 | - `track_number` (numeric): Number of the track within an album or a disc 203 | - `track_id` (string): Id of the track 204 | - `artist` (string): Artist(s) of the track 205 | - `genre` (string): Genre(s) of the track 206 | - `album` (string): Title of the album the track belongs to 207 | - `album_type` (string): Type of the album 208 | - `album_artist` (string): List of all artists appeared on the album 209 | - `disc_number` (numeric): Number of the disc or other physical medium the track belongs to 210 | - `release_year` (numeric): Year (YYYY) when the album was released 211 | 212 | Each aforementioned sequence when referenced in an output template will be replaced by the actual value corresponding to the sequence name. Note that some of the sequences are not guaranteed to be present since they depend on the metadata obtained by a particular extractor. Such sequences will be replaced with `NA`. 213 | 214 | For example for `-o %(title)s-%(id)s.%(ext)s` and an mp4 video with title `youtube-dl test video` and id `BaW_jenozKcj`, this will result in a `youtube-dl test video-BaW_jenozKcj.mp4` file created in the current directory. 215 | 216 | For numeric sequences you can use numeric related formatting, for example, `%(view_count)05d` will result in a string with view count padded with zeros up to 5 characters, like in `00042`. 217 | 218 | Output templates can also contain arbitrary hierarchical path, e.g. `-o '%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s'` which will result in downloading each video in a directory corresponding to this path template. Any missing directory will be automatically created for you. 219 | 220 | To use percent literals in an output template use `%%`. To output to stdout use `-o -`. 221 | 222 | The current default template is `%(title)s-%(id)s.%(ext)s`. 223 | 224 | In some cases, you don't want special characters such as 中, spaces, or &, such as when transferring the downloaded filename to a Windows system or the filename through an 8bit-unsafe channel. In these cases, add the `--restrict-filenames` flag to get a shorter title: 225 | 226 | #### Output template and Windows batch files 227 | 228 | If you are using an output template inside a Windows batch file then you must escape plain percent characters (`%`) by doubling, so that `-o "%(title)s-%(id)s.%(ext)s"` should become `-o "%%(title)s-%%(id)s.%%(ext)s"`. However you should not touch `%`'s that are not plain characters, e.g. environment variables for expansion should stay intact: `-o "C:\%HOMEPATH%\Desktop\%%(title)s.%%(ext)s"`. 229 | 230 | #### Output template examples 231 | 232 | Note that on Windows you may need to use double quotes instead of single. 233 | 234 | ```bash 235 | $ youtube-dl --get-filename -o '%(title)s.%(ext)s' BaW_jenozKc 236 | youtube-dl test video ''_ä↭𝕐.mp4 # All kinds of weird characters 237 | 238 | $ youtube-dl --get-filename -o '%(title)s.%(ext)s' BaW_jenozKc --restrict-filenames 239 | youtube-dl_test_video_.mp4 # A simple file name 240 | 241 | # Download YouTube playlist videos in separate directory indexed by video order in a playlist 242 | $ youtube-dl -o '%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s' https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re 243 | 244 | # Download all playlists of YouTube channel/user keeping each playlist in separate directory: 245 | $ youtube-dl -o '%(uploader)s/%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s' https://www.youtube.com/user/TheLinuxFoundation/playlists 246 | 247 | # Download Udemy course keeping each chapter in separate directory under MyVideos directory in your home 248 | $ youtube-dl -u user -p password -o '~/MyVideos/%(playlist)s/%(chapter_number)s - %(chapter)s/%(title)s.%(ext)s' https://www.udemy.com/java-tutorial/ 249 | 250 | # Download entire series season keeping each series and each season in separate directory under C:/MyVideos 251 | $ youtube-dl -o "C:/MyVideos/%(series)s/%(season_number)s - %(season)s/%(episode_number)s - %(episode)s.%(ext)s" https://videomore.ru/kino_v_detalayah/5_sezon/367617 252 | 253 | # Stream the video being downloaded to stdout 254 | $ youtube-dl -o - BaW_jenozKc 255 | ``` 256 | -------------------------------------------------------------------------------- /.pylintrc: -------------------------------------------------------------------------------- 1 | [MASTER] 2 | 3 | # A comma-separated list of package or module names from where C extensions may 4 | # be loaded. Extensions are loading into the active Python interpreter and may 5 | # run arbitrary code. 6 | extension-pkg-whitelist= 7 | 8 | # Add files or directories to the blacklist. They should be base names, not 9 | # paths. 10 | ignore=CVS 11 | 12 | # Add files or directories matching the regex patterns to the blacklist. The 13 | # regex matches against base names, not paths. 14 | ignore-patterns= 15 | 16 | # Python code to execute, usually for sys.path manipulation such as 17 | # pygtk.require(). 18 | #init-hook= 19 | 20 | # Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the 21 | # number of processors available to use. 22 | jobs=0 23 | 24 | # Control the amount of potential inferred values when inferring a single 25 | # object. This can help the performance when dealing with large functions or 26 | # complex, nested conditions. 27 | limit-inference-results=100 28 | 29 | # List of plugins (as comma separated values of python modules names) to load, 30 | # usually to register additional checkers. 31 | load-plugins= 32 | 33 | # Pickle collected data for later comparisons. 34 | persistent=yes 35 | 36 | # Specify a configuration file. 37 | #rcfile= 38 | 39 | # When enabled, pylint would attempt to guess common misconfiguration and emit 40 | # user-friendly hints instead of false-positive error messages. 41 | suggestion-mode=yes 42 | 43 | # Allow loading of arbitrary C extensions. Extensions are imported into the 44 | # active Python interpreter and may run arbitrary code. 45 | unsafe-load-any-extension=no 46 | 47 | 48 | [MESSAGES CONTROL] 49 | 50 | # Only show warnings with the listed confidence levels. Leave empty to show 51 | # all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED. 52 | confidence= 53 | 54 | # Disable the message, report, category or checker with the given id(s). You 55 | # can either give multiple identifiers separated by comma (,) or put this 56 | # option multiple times (only on the command line, not in the configuration 57 | # file where it should appear only once). You can also use "--disable=all" to 58 | # disable everything first and then reenable specific checks. For example, if 59 | # you want to run only the similarities checker, you can use "--disable=all 60 | # --enable=similarities". If you want to run only the classes checker, but have 61 | # no Warning level messages displayed, use "--disable=all --enable=classes 62 | # --disable=W". 63 | disable=invalid-name, 64 | missing-docstring, 65 | too-many-lines, 66 | print-statement, 67 | parameter-unpacking, 68 | unpacking-in-except, 69 | old-raise-syntax, 70 | backtick, 71 | long-suffix, 72 | old-ne-operator, 73 | old-octal-literal, 74 | import-star-module-level, 75 | non-ascii-bytes-literal, 76 | raw-checker-failed, 77 | bad-inline-option, 78 | locally-disabled, 79 | file-ignored, 80 | suppressed-message, 81 | useless-suppression, 82 | deprecated-pragma, 83 | use-symbolic-message-instead, 84 | too-many-instance-attributes, 85 | too-many-public-methods, 86 | too-many-branches, 87 | too-many-arguments, 88 | too-many-locals, 89 | too-many-statements, 90 | no-else-return, 91 | no-else-raise, 92 | unnecessary-pass, 93 | wildcard-import, 94 | unused-wildcard-import, 95 | apply-builtin, 96 | basestring-builtin, 97 | buffer-builtin, 98 | cmp-builtin, 99 | coerce-builtin, 100 | execfile-builtin, 101 | file-builtin, 102 | long-builtin, 103 | raw_input-builtin, 104 | reduce-builtin, 105 | standarderror-builtin, 106 | unicode-builtin, 107 | xrange-builtin, 108 | coerce-method, 109 | delslice-method, 110 | getslice-method, 111 | setslice-method, 112 | no-absolute-import, 113 | old-division, 114 | dict-iter-method, 115 | dict-view-method, 116 | next-method-called, 117 | metaclass-assignment, 118 | indexing-exception, 119 | raising-string, 120 | reload-builtin, 121 | oct-method, 122 | hex-method, 123 | nonzero-method, 124 | cmp-method, 125 | input-builtin, 126 | round-builtin, 127 | intern-builtin, 128 | unichr-builtin, 129 | map-builtin-not-iterating, 130 | zip-builtin-not-iterating, 131 | range-builtin-not-iterating, 132 | filter-builtin-not-iterating, 133 | using-cmp-argument, 134 | eq-without-hash, 135 | div-method, 136 | idiv-method, 137 | rdiv-method, 138 | exception-message-attribute, 139 | invalid-str-codec, 140 | sys-max-int, 141 | bad-python3-import, 142 | deprecated-string-function, 143 | deprecated-str-translate-call, 144 | deprecated-itertools-function, 145 | deprecated-types-field, 146 | next-method-defined, 147 | dict-items-not-iterating, 148 | dict-keys-not-iterating, 149 | dict-values-not-iterating, 150 | deprecated-operator-function, 151 | deprecated-urllib-function, 152 | xreadlines-attribute, 153 | deprecated-sys-function, 154 | exception-escape, 155 | comprehension-escape 156 | 157 | # Enable the message, report, category or checker with the given id(s). You can 158 | # either give multiple identifier separated by comma (,) or put this option 159 | # multiple time (only on the command line, not in the configuration file where 160 | # it should appear only once). See also the "--disable" option for examples. 161 | enable=c-extension-no-member 162 | 163 | 164 | [REPORTS] 165 | 166 | # Python expression which should return a note less than 10 (10 is the highest 167 | # note). You have access to the variables errors warning, statement which 168 | # respectively contain the number of errors / warnings messages and the total 169 | # number of statements analyzed. This is used by the global evaluation report 170 | # (RP0004). 171 | evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10) 172 | 173 | # Template used to display messages. This is a python new-style format string 174 | # used to format the message information. See doc for all details. 175 | #msg-template= 176 | 177 | # Set the output format. Available formats are text, parseable, colorized, json 178 | # and msvs (visual studio). You can also give a reporter class, e.g. 179 | # mypackage.mymodule.MyReporterClass. 180 | output-format=text 181 | 182 | # Tells whether to display a full report or only the messages. 183 | reports=no 184 | 185 | # Activate the evaluation score. 186 | score=yes 187 | 188 | 189 | [REFACTORING] 190 | 191 | # Maximum number of nested blocks for function / method body 192 | max-nested-blocks=5 193 | 194 | # Complete name of functions that never returns. When checking for 195 | # inconsistent-return-statements if a never returning function is called then 196 | # it will be considered as an explicit return statement and no message will be 197 | # printed. 198 | never-returning-functions=sys.exit 199 | 200 | 201 | [VARIABLES] 202 | 203 | # List of additional names supposed to be defined in builtins. Remember that 204 | # you should avoid defining new builtins when possible. 205 | additional-builtins= 206 | 207 | # Tells whether unused global variables should be treated as a violation. 208 | allow-global-unused-variables=yes 209 | 210 | # List of strings which can identify a callback function by name. A callback 211 | # name must start or end with one of those strings. 212 | callbacks=cb_, 213 | _cb 214 | 215 | # A regular expression matching the name of dummy variables (i.e. expected to 216 | # not be used). 217 | dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_ 218 | 219 | # Argument names that match this expression will be ignored. Default to name 220 | # with leading underscore. 221 | ignored-argument-names=_.*|^ignored_|^unused_ 222 | 223 | # Tells whether we should check for unused import in __init__ files. 224 | init-import=no 225 | 226 | # List of qualified module names which can have objects that can redefine 227 | # builtins. 228 | redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io 229 | 230 | 231 | [TYPECHECK] 232 | 233 | # List of decorators that produce context managers, such as 234 | # contextlib.contextmanager. Add to this list to register other decorators that 235 | # produce valid context managers. 236 | contextmanager-decorators=contextlib.contextmanager 237 | 238 | # List of members which are set dynamically and missed by pylint inference 239 | # system, and so shouldn't trigger E1101 when accessed. Python regular 240 | # expressions are accepted. 241 | generated-members= 242 | 243 | # Tells whether missing members accessed in mixin class should be ignored. A 244 | # mixin class is detected if its name ends with "mixin" (case insensitive). 245 | ignore-mixin-members=yes 246 | 247 | # Tells whether to warn about missing members when the owner of the attribute 248 | # is inferred to be None. 249 | ignore-none=yes 250 | 251 | # This flag controls whether pylint should warn about no-member and similar 252 | # checks whenever an opaque object is returned when inferring. The inference 253 | # can return multiple potential results while evaluating a Python object, but 254 | # some branches might not be evaluated, which results in partial inference. In 255 | # that case, it might be useful to still emit no-member and other checks for 256 | # the rest of the inferred objects. 257 | ignore-on-opaque-inference=yes 258 | 259 | # List of class names for which member attributes should not be checked (useful 260 | # for classes with dynamically set attributes). This supports the use of 261 | # qualified names. 262 | ignored-classes=optparse.Values,thread._local,_thread._local 263 | 264 | # List of module names for which member attributes should not be checked 265 | # (useful for modules/projects where namespaces are manipulated during runtime 266 | # and thus existing member attributes cannot be deduced by static analysis. It 267 | # supports qualified module names, as well as Unix pattern matching. 268 | ignored-modules= 269 | 270 | # Show a hint with possible names when a member name was not found. The aspect 271 | # of finding the hint is based on edit distance. 272 | missing-member-hint=yes 273 | 274 | # The minimum edit distance a name should have in order to be considered a 275 | # similar match for a missing member name. 276 | missing-member-hint-distance=1 277 | 278 | # The total number of similar names that should be taken in consideration when 279 | # showing a hint for a missing member. 280 | missing-member-max-choices=1 281 | 282 | 283 | [STRING] 284 | 285 | # This flag controls whether the implicit-str-concat-in-sequence should 286 | # generate a warning on implicit string concatenation in sequences defined over 287 | # several lines. 288 | check-str-concat-over-line-jumps=no 289 | 290 | 291 | [SPELLING] 292 | 293 | # Limits count of emitted suggestions for spelling mistakes. 294 | max-spelling-suggestions=4 295 | 296 | # Spelling dictionary name. Available dictionaries: none. To make it working 297 | # install python-enchant package.. 298 | spelling-dict= 299 | 300 | # List of comma separated words that should not be checked. 301 | spelling-ignore-words= 302 | 303 | # A path to a file that contains private dictionary; one word per line. 304 | spelling-private-dict-file= 305 | 306 | # Tells whether to store unknown words to indicated private dictionary in 307 | # --spelling-private-dict-file option instead of raising a message. 308 | spelling-store-unknown-words=no 309 | 310 | 311 | [SIMILARITIES] 312 | 313 | # Ignore comments when computing similarities. 314 | ignore-comments=yes 315 | 316 | # Ignore docstrings when computing similarities. 317 | ignore-docstrings=yes 318 | 319 | # Ignore imports when computing similarities. 320 | ignore-imports=no 321 | 322 | # Minimum lines number of a similarity. 323 | min-similarity-lines=4 324 | 325 | 326 | [MISCELLANEOUS] 327 | 328 | # List of note tags to take in consideration, separated by a comma. 329 | notes=FIXME, 330 | XXX, 331 | TODO 332 | 333 | 334 | [LOGGING] 335 | 336 | # Format style used to check logging format string. `old` means using % 337 | # formatting, while `new` is for `{}` formatting. 338 | logging-format-style=old 339 | 340 | # Logging modules to check that the string format arguments are in logging 341 | # function parameter format. 342 | logging-modules=logging 343 | 344 | 345 | [FORMAT] 346 | 347 | # Expected format of line ending, e.g. empty (any line ending), LF or CRLF. 348 | expected-line-ending-format= 349 | 350 | # Regexp for a line that is allowed to be longer than the limit. 351 | ignore-long-lines=^\s*(# )??$ 352 | 353 | # Number of spaces of indent required inside a hanging or continued line. 354 | #indent-after-paren=4 355 | 356 | # String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 357 | # tab). 358 | indent-string=' ' 359 | 360 | # Maximum number of characters on a single line. 361 | max-line-length=120 362 | 363 | # Maximum number of lines in a module. 364 | max-module-lines=1000 365 | 366 | # List of optional constructs for which whitespace checking is disabled. `dict- 367 | # separator` is used to allow tabulation in dicts, etc.: {1 : 1,\n222: 2}. 368 | # `trailing-comma` allows a space between comma and closing bracket: (a, ). 369 | # `empty-line` allows space-only lines. 370 | no-space-check=trailing-comma, 371 | dict-separator 372 | 373 | # Allow the body of a class to be on the same line as the declaration if body 374 | # contains single statement. 375 | single-line-class-stmt=no 376 | 377 | # Allow the body of an if to be on the same line as the test if there is no 378 | # else. 379 | single-line-if-stmt=no 380 | 381 | 382 | [BASIC] 383 | 384 | # Naming style matching correct argument names. 385 | argument-naming-style=snake_case 386 | 387 | # Regular expression matching correct argument names. Overrides argument- 388 | # naming-style. 389 | #argument-rgx= 390 | 391 | # Naming style matching correct attribute names. 392 | attr-naming-style=snake_case 393 | 394 | # Regular expression matching correct attribute names. Overrides attr-naming- 395 | # style. 396 | #attr-rgx= 397 | 398 | # Bad variable names which should always be refused, separated by a comma. 399 | bad-names=foo, 400 | bar, 401 | baz, 402 | toto, 403 | tutu, 404 | tata 405 | 406 | # Naming style matching correct class attribute names. 407 | class-attribute-naming-style=any 408 | 409 | # Regular expression matching correct class attribute names. Overrides class- 410 | # attribute-naming-style. 411 | #class-attribute-rgx= 412 | 413 | # Naming style matching correct class names. 414 | class-naming-style=PascalCase 415 | 416 | # Regular expression matching correct class names. Overrides class-naming- 417 | # style. 418 | #class-rgx= 419 | 420 | # Naming style matching correct constant names. 421 | const-naming-style=UPPER_CASE 422 | 423 | # Regular expression matching correct constant names. Overrides const-naming- 424 | # style. 425 | #const-rgx= 426 | 427 | # Minimum line length for functions/classes that require docstrings, shorter 428 | # ones are exempt. 429 | docstring-min-length=-1 430 | 431 | # Naming style matching correct function names. 432 | function-naming-style=snake_case 433 | 434 | # Regular expression matching correct function names. Overrides function- 435 | # naming-style. 436 | #function-rgx= 437 | 438 | # Good variable names which should always be accepted, separated by a comma. 439 | good-names=i, 440 | j, 441 | k, 442 | ex, 443 | Run, 444 | _ 445 | 446 | # Include a hint for the correct naming format with invalid-name. 447 | include-naming-hint=no 448 | 449 | # Naming style matching correct inline iteration names. 450 | inlinevar-naming-style=any 451 | 452 | # Regular expression matching correct inline iteration names. Overrides 453 | # inlinevar-naming-style. 454 | #inlinevar-rgx= 455 | 456 | # Naming style matching correct method names. 457 | method-naming-style=snake_case 458 | 459 | # Regular expression matching correct method names. Overrides method-naming- 460 | # style. 461 | #method-rgx= 462 | 463 | # Naming style matching correct module names. 464 | module-naming-style=snake_case 465 | 466 | # Regular expression matching correct module names. Overrides module-naming- 467 | # style. 468 | #module-rgx= 469 | 470 | # Colon-delimited sets of names that determine each other's naming style when 471 | # the name regexes allow several styles. 472 | name-group= 473 | 474 | # Regular expression which should only match function or class names that do 475 | # not require a docstring. 476 | no-docstring-rgx=^_ 477 | 478 | # List of decorators that produce properties, such as abc.abstractproperty. Add 479 | # to this list to register other decorators that produce valid properties. 480 | # These decorators are taken in consideration only for invalid-name. 481 | property-classes=abc.abstractproperty 482 | 483 | # Naming style matching correct variable names. 484 | variable-naming-style=snake_case 485 | 486 | # Regular expression matching correct variable names. Overrides variable- 487 | # naming-style. 488 | #variable-rgx= 489 | 490 | 491 | [IMPORTS] 492 | 493 | # Allow wildcard imports from modules that define __all__. 494 | allow-wildcard-with-all=no 495 | 496 | # Analyse import fallback blocks. This can be used to support both Python 2 and 497 | # 3 compatible code, which means that the block might have code that exists 498 | # only in one or another interpreter, leading to false positives when analysed. 499 | analyse-fallback-blocks=no 500 | 501 | # Deprecated modules which should not be used, separated by a comma. 502 | deprecated-modules=optparse,tkinter.tix 503 | 504 | # Create a graph of external dependencies in the given file (report RP0402 must 505 | # not be disabled). 506 | ext-import-graph= 507 | 508 | # Create a graph of every (i.e. internal and external) dependencies in the 509 | # given file (report RP0402 must not be disabled). 510 | import-graph= 511 | 512 | # Create a graph of internal dependencies in the given file (report RP0402 must 513 | # not be disabled). 514 | int-import-graph= 515 | 516 | # Force import order to recognize a module as part of the standard 517 | # compatibility libraries. 518 | known-standard-library= 519 | 520 | # Force import order to recognize a module as part of a third party library. 521 | known-third-party=enchant 522 | 523 | 524 | [DESIGN] 525 | 526 | # Maximum number of arguments for function / method. 527 | max-args=5 528 | 529 | # Maximum number of attributes for a class (see R0902). 530 | max-attributes=7 531 | 532 | # Maximum number of boolean expressions in an if statement. 533 | max-bool-expr=5 534 | 535 | # Maximum number of branch for function / method body. 536 | max-branches=12 537 | 538 | # Maximum number of locals for function / method body. 539 | max-locals=15 540 | 541 | # Maximum number of parents for a class (see R0901). 542 | max-parents=7 543 | 544 | # Maximum number of public methods for a class (see R0904). 545 | max-public-methods=20 546 | 547 | # Maximum number of return / yield for function / method body. 548 | max-returns=6 549 | 550 | # Maximum number of statements in function / method body. 551 | max-statements=50 552 | 553 | # Minimum number of public methods for a class (see R0903). 554 | min-public-methods=2 555 | 556 | 557 | [CLASSES] 558 | 559 | # List of method names used to declare (i.e. assign) instance attributes. 560 | defining-attr-methods=__init__, 561 | __new__, 562 | setUp 563 | 564 | # List of member names, which should be excluded from the protected access 565 | # warning. 566 | exclude-protected=_asdict, 567 | _fields, 568 | _replace, 569 | _source, 570 | _make 571 | 572 | # List of valid names for the first argument in a class method. 573 | valid-classmethod-first-arg=cls 574 | 575 | # List of valid names for the first argument in a metaclass class method. 576 | valid-metaclass-classmethod-first-arg=cls 577 | 578 | 579 | [EXCEPTIONS] 580 | 581 | # Exceptions that will emit a warning when being caught. Defaults to 582 | # "BaseException, Exception". 583 | overgeneral-exceptions=BaseException, 584 | Exception 585 | --------------------------------------------------------------------------------