├── ytsearch ├── __version__.py ├── __init__.py ├── constants.py ├── autocomplete.py ├── parser │ ├── horizontalcard.py │ ├── utils │ │ └── __init__.py │ ├── show.py │ ├── channel.py │ ├── playlist.py │ ├── video.py │ └── __init__.py ├── filters │ └── __init__.py └── full_search.py ├── setup.py ├── .gitignore └── readme.md /ytsearch/__version__.py: -------------------------------------------------------------------------------- 1 | __version__ = "2022.10.25.0" 2 | -------------------------------------------------------------------------------- /ytsearch/__init__.py: -------------------------------------------------------------------------------- 1 | from . import filters 2 | from .autocomplete import autocomplete 3 | from .full_search import search 4 | -------------------------------------------------------------------------------- /ytsearch/constants.py: -------------------------------------------------------------------------------- 1 | YOUTUBE_BASE_URL = "https://www.youtube.com" 2 | YOUTUBE_SEARCH_URL = YOUTUBE_BASE_URL + "/results" 3 | 4 | PUBLIC_YOUTUBE_API_KEY = "AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8" 5 | YOUTUBE_SEARCH_API = YOUTUBE_BASE_URL + "/youtubei/v1/search" 6 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import find_packages, setup 2 | 3 | from ytsearch.__version__ import __version__ 4 | 5 | setup( 6 | name="fast-yt-search", 7 | version=__version__, 8 | author="kr@justfoolingaround", 9 | author_email="kr.justfoolingaround@gmail.com", 10 | description="The most powerful and fastest YouTube searching Python library.", 11 | packages=find_packages(), 12 | url="https://github.com/justfoolingaround/fast-yt-search", 13 | ) 14 | -------------------------------------------------------------------------------- /ytsearch/autocomplete.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | SUGGEST_QUERIES_ENDPOINT = "https://suggestqueries-clients6.youtube.com/complete/search" 4 | 5 | 6 | def autocomplete( 7 | session, 8 | incomplete_query, 9 | *, 10 | client_name="youtube", 11 | home_language="en", 12 | video_id=None 13 | ): 14 | query = { 15 | "q": incomplete_query, 16 | "hl": home_language, 17 | "client": client_name, 18 | "callback": ".", 19 | } 20 | 21 | if video_id: 22 | query.update({"video_id": video_id}) 23 | 24 | for __ in json.loads( 25 | session.get(SUGGEST_QUERIES_ENDPOINT, params=query).text[7:-1] 26 | )[1:-1]: 27 | for result, _, _ in __: 28 | yield result 29 | -------------------------------------------------------------------------------- /ytsearch/parser/horizontalcard.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | from .utils import bump_image, get_text 4 | 5 | 6 | @dataclass 7 | class HorizontalCard: 8 | query: str 9 | thumbnail: str = None 10 | 11 | 12 | def from_horizontalcard_renderer(data: dict): 13 | def genexp(): 14 | for card in data["cards"]: 15 | 16 | renderer = card["searchRefinementCardRenderer"] 17 | 18 | component = {} 19 | 20 | thumbnails = [ 21 | _["url"] for _ in renderer.get("thumbnail", {}).get("thumbnails", []) 22 | ] 23 | 24 | if thumbnails: 25 | selected = thumbnails[-1] 26 | 27 | if selected[-13:] == "mqdefault.jpg": 28 | component.update(thumbnail=selected) 29 | else: 30 | component.update(thumbnail=bump_image(thumbnails[-1])) 31 | 32 | component.update( 33 | query=get_text(renderer["query"]), 34 | ) 35 | 36 | yield component 37 | 38 | return list(HorizontalCard(**_) for _ in genexp()) 39 | -------------------------------------------------------------------------------- /ytsearch/parser/utils/__init__.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | GOOGLE_PHOTOS_URL_RE = re.compile(r"^(?:(?:https?:)?//)(.+?\.ggpht\.com/.+?)=(.+)$") 4 | YOUTUBE_THUMBNAIL_URL_RE = re.compile( 5 | r"^(?:(?:https?:)?//)(?:.+?\.ytimg|img\.youtube)\.com/v[i0-9]/(.+?)/.+?\.[^&?/]+$" 6 | ) 7 | 8 | 9 | def get_text( 10 | component: dict, *, runs_joiner=", ", run_accessor=lambda run: run["text"] 11 | ): 12 | 13 | if "runs" in component: 14 | return runs_joiner.join(run_accessor(run) for run in component["runs"]) 15 | 16 | if "simpleText" in component: 17 | return component["simpleText"] 18 | 19 | return None 20 | 21 | 22 | def get_maxres_video_thumbnail(video_id: str): 23 | return f"https://i.ytimg.com/vi/{video_id}/maxresdefault.jpg" 24 | 25 | 26 | def bump_image(image, *, size=10000): 27 | 28 | google_photos_match = GOOGLE_PHOTOS_URL_RE.match(image) 29 | 30 | if google_photos_match: 31 | return f"https://{google_photos_match.group(1)}=s{size}" 32 | 33 | youtube_thumbnail_match = YOUTUBE_THUMBNAIL_URL_RE.match(image) 34 | 35 | if youtube_thumbnail_match: 36 | return get_maxres_video_thumbnail(youtube_thumbnail_match.group(1)) 37 | 38 | return image 39 | -------------------------------------------------------------------------------- /ytsearch/parser/show.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | from .channel import Channel 4 | from .utils import get_text 5 | 6 | 7 | @dataclass 8 | class Show: 9 | id: str 10 | title: str 11 | channel: Channel = None 12 | thumbnail: str = None 13 | 14 | 15 | def from_show_renderer(data: dict): 16 | 17 | component = { 18 | "title": get_text(data["title"]), 19 | } 20 | 21 | owner_runs = data.get("longBylineText", {}).get("runs", []) 22 | 23 | if owner_runs: 24 | channel = owner_runs[0] 25 | component["channel"] = Channel( 26 | channel["navigationEndpoint"]["browseEndpoint"]["browseId"], 27 | channel["text"], 28 | ) 29 | 30 | if "naviagtionEndpoint" in data: 31 | component["id"] = data["navigationEndpoint"]["browseEndpoint"]["browseId"] 32 | 33 | thumbnails = ( 34 | data.get("thumbnailRenderer", {}) 35 | .get("showCustomThumbnailRenderer", {}) 36 | .get("thumbnail", {}) 37 | .get("thumbnails", []) 38 | ) 39 | 40 | if thumbnails: 41 | highest_quality = thumbnails[-1] 42 | 43 | component.update(thumbnail=highest_quality["url"]) 44 | 45 | return Show(**component) 46 | -------------------------------------------------------------------------------- /ytsearch/parser/channel.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | from .utils import bump_image, get_text 4 | 5 | 6 | @dataclass 7 | class Channel: 8 | id: str 9 | name: str 10 | video_count: str = None 11 | subscriber_count: str = None 12 | description: str = None 13 | badges: list = None 14 | thumbnail: str = None 15 | 16 | 17 | def from_channel_renderer(data: dict): 18 | 19 | component = { 20 | "id": data["channelId"], 21 | "name": get_text(data["title"]), 22 | } 23 | 24 | if "videoCountText" in data: 25 | component["video_count"] = get_text(data["videoCountText"], runs_joiner=" ") 26 | 27 | if "subscriberCountText" in data: 28 | component["subscriber_count"] = get_text(data["subscriberCountText"]) 29 | 30 | if "descriptionSnippet" in data: 31 | component["description"] = get_text(data["descriptionSnippet"]) 32 | 33 | if "ownerBadges" in data: 34 | component["badges"] = [ 35 | _["metadataBadgeRenderer"]["style"] for _ in data["ownerBadges"] 36 | ] 37 | 38 | thumbnails = [_["url"] for _ in data["thumbnail"]["thumbnails"]] 39 | 40 | if thumbnails: 41 | component.update(thumbnail=bump_image(thumbnails[0])) 42 | 43 | return Channel(**component) 44 | -------------------------------------------------------------------------------- /ytsearch/filters/__init__.py: -------------------------------------------------------------------------------- 1 | import base64 2 | import enum 3 | 4 | NO_AUTOCORRECT_KEY = b"\x42\x02\x08\x01" 5 | 6 | 7 | class SortBy(enum.Enum): 8 | 9 | RELEVANCE = b"" 10 | UPLOAD_DATE = b"\x08\x02" 11 | VIEW_COUNT = b"\x08\x03" 12 | RATING = b"\x08\x01" 13 | 14 | 15 | class Feature(enum.Enum): 16 | 17 | LIVE = b"\x40" 18 | Q_4K = b"\x70" 19 | Q_HD = b"\x20" 20 | SUBTITLES = b"\x28" 21 | CREATIVE_COMMONS = b"\x30" 22 | Q_360 = b"\x78" 23 | Q_VR180 = b"\xd0" 24 | Q_3D = b"\x38" 25 | Q_HDR = b"\xc8" 26 | LOCATION = b"\xb8" 27 | PURCHASED = b"\x48" 28 | 29 | 30 | class Duration(enum.Enum): 31 | 32 | LESS_THAN_4 = b"\x01" 33 | BETWEEN_4_20 = b"\x03" 34 | MORE_THAN_20 = b"\x02" 35 | 36 | 37 | class ContentType(enum.Enum): 38 | 39 | VIDEO = b"\x01" 40 | CHANNEL = b"\x02" 41 | PLAYLIST = b"\x03" 42 | MOVIE = b"\x04" 43 | 44 | 45 | class UploadTime(enum.Enum): 46 | 47 | LAST_HOUR = b"\x01" 48 | TODAY = b"\x02" 49 | THIS_WEEK = b"\x03" 50 | THIS_MONTH = b"\x04" 51 | THIS_YEAR = b"\x05" 52 | 53 | 54 | def get_filter_key( 55 | sort_by: SortBy = SortBy.RELEVANCE, 56 | features: "tuple[Feature]" = (), 57 | duration: "Duration | None" = None, 58 | content_type: "ContentType | None" = None, 59 | upload_time: "UploadTime | None" = None, 60 | *, 61 | autocorrect: bool = False 62 | ): 63 | 64 | initial = b"" 65 | 66 | if upload_time is not None: 67 | initial += b"\x08" + upload_time.value 68 | 69 | if content_type is not None: 70 | initial += b"\x10" + content_type.value 71 | 72 | if duration is not None: 73 | initial += b"\x18" + duration.value 74 | 75 | for features in features: 76 | initial += features.value + b"\x01" 77 | 78 | retval = sort_by.value 79 | 80 | if initial: 81 | retval += b"\x12" + bytes((len(initial),)) + initial 82 | 83 | if not autocorrect: 84 | retval += NO_AUTOCORRECT_KEY 85 | 86 | return base64.b64encode(retval).decode() 87 | -------------------------------------------------------------------------------- /ytsearch/parser/playlist.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | from .channel import Channel 4 | from .utils import bump_image, get_maxres_video_thumbnail, get_text 5 | from .video import Video 6 | 7 | 8 | @dataclass 9 | class Playlist: 10 | id: str 11 | title: str 12 | video_count: str = None 13 | relative_upload_time: str = None 14 | channel: Channel = None 15 | videos: list = None 16 | is_live: bool = False 17 | duration: str = None 18 | thumbnail: str = None 19 | 20 | 21 | def from_playlist_renderer(data: dict): 22 | 23 | component = { 24 | "id": data["playlistId"], 25 | "title": get_text(data["title"]), 26 | } 27 | 28 | if "videoCountText" in data: 29 | component["video_count"] = get_text(data["videoCountText"], runs_joiner=" ") 30 | 31 | if "publishedTimeText" in data: 32 | component["relative_upload_time"] = get_text(data["publishedTimeText"]) 33 | 34 | owner_runs = data.get("longBylineText", {}).get("runs", []) 35 | 36 | if owner_runs: 37 | channel = owner_runs[0] 38 | component["channel"] = Channel( 39 | channel["navigationEndpoint"]["browseEndpoint"]["browseId"], 40 | channel["text"], 41 | ) 42 | 43 | def genexp(): 44 | for video in data.get("videos"): 45 | video_component = video["childVideoRenderer"] 46 | 47 | internal_component = { 48 | "title": get_text(video_component["title"]), 49 | "id": video_component["videoId"], 50 | "thumbnail": get_maxres_video_thumbnail(video_component["videoId"]), 51 | } 52 | 53 | is_live = not ("lengthText" in video_component) 54 | internal_component.update(is_live=is_live) 55 | 56 | if not is_live: 57 | internal_component["duration"] = get_text(video_component["lengthText"]) 58 | 59 | yield Video(**internal_component) 60 | 61 | component["videos"] = list(genexp()) 62 | 63 | return Playlist(**component) 64 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | # justfoolingaround's devtests 132 | devtest.py -------------------------------------------------------------------------------- /ytsearch/parser/video.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | from .channel import Channel 4 | from .utils import bump_image, get_maxres_video_thumbnail, get_text 5 | 6 | 7 | @dataclass 8 | class Video: 9 | id: str 10 | title: str 11 | thumbnail: str 12 | channel: Channel = None 13 | 14 | views: str = None 15 | duration: str = None 16 | is_live: bool = False 17 | short_description: str = None 18 | rich_thumbnail: list = None 19 | badges: list = None 20 | relative_upload_time: str = None 21 | 22 | 23 | def from_video_renderer(data: dict): 24 | 25 | component = { 26 | "id": data["videoId"], 27 | "thumbnail": get_maxres_video_thumbnail(data["videoId"]), 28 | "title": get_text(data["title"]), 29 | } 30 | 31 | if "viewCountText" in data: 32 | component["views"] = get_text(data["viewCountText"], runs_joiner=" ") 33 | 34 | if "richThumbnail" in data: 35 | component["rich_thumbnail"] = [ 36 | _["url"] 37 | for _ in data["richThumbnail"]["movingThumbnailRenderer"][ 38 | "movingThumbnailDetails" 39 | ]["thumbnails"] 40 | ] 41 | 42 | if "badges" in data: 43 | component["badges"] = [ 44 | _["metadataBadgeRenderer"]["style"] for _ in data["badges"] 45 | ] 46 | 47 | if "publishedTimeText" in data: 48 | component["relative_upload_time"] = get_text(data["publishedTimeText"]) 49 | 50 | if "detailedMetadataSnippets" in data: 51 | component["short_description"] = "\n".join( 52 | get_text(_["snippetText"]) for _ in data["detailedMetadataSnippets"] 53 | ) 54 | 55 | is_live = not ("lengthText" in data) 56 | component.update(is_live=is_live) 57 | 58 | if not is_live: 59 | component["duration"] = get_text(data["lengthText"]) 60 | 61 | if "ownerText" in data: 62 | channel_info_raw = data["ownerText"] 63 | 64 | channel_info = { 65 | "id": get_text( 66 | channel_info_raw, 67 | run_accessor=lambda run: run["navigationEndpoint"]["browseEndpoint"][ 68 | "browseId" 69 | ], 70 | ), 71 | "name": get_text(channel_info_raw, run_accessor=lambda run: run["text"]), 72 | } 73 | 74 | if "ownerBadges" in data: 75 | channel_info["badges"] = [ 76 | _["metadataBadgeRenderer"]["style"] for _ in data["ownerBadges"] 77 | ] 78 | 79 | thumbnails = [ 80 | _["url"] 81 | for _ in data["channelThumbnailSupportedRenderers"][ 82 | "channelThumbnailWithLinkRenderer" 83 | ]["thumbnail"]["thumbnails"] 84 | ] 85 | 86 | if thumbnails: 87 | channel_info.update(thumbnail=bump_image(thumbnails[0])) 88 | 89 | component.update(channel=Channel(**channel_info)) 90 | 91 | return Video(**component) 92 | -------------------------------------------------------------------------------- /ytsearch/parser/__init__.py: -------------------------------------------------------------------------------- 1 | from .channel import from_channel_renderer 2 | from .horizontalcard import from_horizontalcard_renderer 3 | from .playlist import from_playlist_renderer 4 | from .show import from_show_renderer 5 | from .utils import get_text 6 | from .video import from_video_renderer 7 | 8 | 9 | def iter_from_item_section_renderer( 10 | data: dict, *, attrs=None, accessor=lambda data: data["contents"] 11 | ): 12 | 13 | if attrs is None: 14 | attrs = {} 15 | 16 | def genexp(attrs=attrs): 17 | 18 | for renderer_shelf in accessor(data): 19 | 20 | if "radioRenderer" in renderer_shelf: 21 | yield { 22 | "type": "radio", 23 | "content": from_playlist_renderer(renderer_shelf["radioRenderer"]), 24 | } 25 | 26 | if "horizontalCardListRenderer" in renderer_shelf: 27 | yield { 28 | "type": "similar_channels", 29 | "content": from_horizontalcard_renderer( 30 | renderer_shelf["horizontalCardListRenderer"] 31 | ), 32 | } 33 | 34 | if "shelfRenderer" in renderer_shelf: 35 | 36 | shadow_attrs = { 37 | "media_group": get_text(renderer_shelf["shelfRenderer"]["title"]), 38 | } 39 | 40 | shadow_attrs.update(attrs) 41 | yield from iter_from_item_section_renderer( 42 | renderer_shelf["shelfRenderer"]["content"]["verticalListRenderer"], 43 | attrs=shadow_attrs, 44 | accessor=lambda data: data["items"], 45 | ) 46 | 47 | if "showingResultsForRenderer" in renderer_shelf: 48 | attrs.update( 49 | { 50 | "showing_results_for": get_text( 51 | renderer_shelf["showingResultsForRenderer"][ 52 | "correctedQuery" 53 | ], 54 | runs_joiner="", 55 | ) 56 | } 57 | ) 58 | 59 | if "didYouMeanRenderer" in renderer_shelf: 60 | attrs.update( 61 | { 62 | "did_you_mean": get_text( 63 | renderer_shelf["didYouMeanRenderer"]["correctedQuery"], 64 | runs_joiner="", 65 | ), 66 | } 67 | ) 68 | 69 | if "videoRenderer" in renderer_shelf: 70 | yield { 71 | "type": "video", 72 | "content": from_video_renderer(renderer_shelf["videoRenderer"]), 73 | } 74 | 75 | if "channelRenderer" in renderer_shelf: 76 | yield { 77 | "type": "channel", 78 | "content": from_channel_renderer(renderer_shelf["channelRenderer"]), 79 | } 80 | 81 | if "playlistRenderer" in renderer_shelf: 82 | yield { 83 | "type": "playlist", 84 | "content": from_playlist_renderer( 85 | renderer_shelf["playlistRenderer"] 86 | ), 87 | } 88 | 89 | if "showRenderer" in renderer_shelf: 90 | yield { 91 | "type": "show", 92 | "content": from_show_renderer(renderer_shelf["showRenderer"]), 93 | } 94 | 95 | for component in genexp(): 96 | component.setdefault("attrs", {}).update(attrs) 97 | yield component 98 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 |

2 | 3 | ## Overview 4 | 5 | - [Installation](#installation) 6 | 1. [PIP Installation](#pip-installation) 7 | 2. [Source Code Download](#source-code-download) 8 | - [Usage](#usage) 9 | - [Core Features](#core-features) 10 | - [Functioning](#functioning) 11 | 12 | ## Installation 13 | 14 | This project can be installed on to your device via different mechanisms, these mechanisms are listed below in the order of ease. 15 | 16 |
    17 | 18 |
  1. PIP Installs Packages aka PIP Installation 19 | 20 | $ pip install git+https://www.github.com/justfoolingaround/fast-yt-search 21 |
  2. 22 |
  3. Source Code Download 23 | 24 | $ git clone https://www.github.com/justfoolingaround/fast-yt-search 25 | 26 | Given that you have [`git`](https://git-scm.com/) installed, you can clone the repository from GitHub. If you do not have or want to deal with installation of [`git`](https://git-scm.com/), you can simply download the repository using [this link.](https://github.com/justfoolingaround/fast-yt-search/archive/refs/heads/master.zip) 27 | 28 | After the repository is downloaded and placed in an appropriate directory, you can use [`setup.py`](./setup.py) to proceed with the installation. 29 | 30 | $ pip install . 31 |
  4. 32 |
33 | This command is to be executed from the directory where the repository is located. 34 | 35 | **Additional information:** You **must** have Python installed **and** in PATH to use this project properly. Your Python executable may be `py` **or** `python` **or** `python3`. **Only Python 3.6 and higher versions are supported by the project.** 36 | 37 | ## Usage 38 | 39 | ```py 40 | import httpx # requests is also supported, httpx is better. 41 | 42 | import ytsearch 43 | 44 | headers = { 45 | "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36" 46 | } 47 | 48 | # We're going to be using a PC user-agent to get full functionality. 49 | 50 | client = httpx.Client(headers=headers) 51 | 52 | for estimated_results, component in ytsearch.search(client, "Rick Astley - Never Gonna Give You Up"): 53 | print(component) 54 | 55 | # This is going to get the first page, now for the fully filtered **all** results. 56 | 57 | for estimated_results, component in ytsearch.search(client, "Rick Astley - Never Gonna Give You Up", content_type=ytsearch.filters.ContentType.VIDEO, keep_searching=True): 58 | print(component) 59 | ``` 60 | 61 | ## Core Features 62 | 63 | - Devastatingly powerful, can search **any** YouTube content limitlessly and effortlessly. 64 | - Supports search filters, and you can pile them up. 65 | - Easy API reference, just import n' use. 66 | - **No external dependency required for installation.** Session clients of libraries similar to `python-requests` or `httpx` will work. 67 | - User controlled API client context and user-agent. 68 | - Gives you whatever you see based on your settings. 69 | - For PC user agents, it will give you those moving thumbnails, namely `rich thumbnails`. 70 | - High efficiency code. 71 | - Supports autocomplete, with the option to set with a video context. 72 | - Gives the highest possible images for thumbnails and channel profile images. 73 | 74 | ## Functioning 75 | 76 | This project functions similarly to your YouTube client, you give it a query, it'll run a search. If you scroll down, it'll give you the illusion of infinite scroll. 77 | 78 | Except, this project is faster. It doesn't load anything other than what is required. 79 | -------------------------------------------------------------------------------- /ytsearch/full_search.py: -------------------------------------------------------------------------------- 1 | from . import filters 2 | from .constants import PUBLIC_YOUTUBE_API_KEY, YOUTUBE_SEARCH_API, YOUTUBE_SEARCH_URL 3 | from .parser import iter_from_item_section_renderer 4 | 5 | 6 | def fetch_data(data: dict, *, initial=False): 7 | 8 | if initial: 9 | medias = ( 10 | data.get("contents", {}) 11 | .get("twoColumnSearchResultsRenderer", {}) 12 | .get("primaryContents", {}) 13 | .get("sectionListRenderer", {}) 14 | .get("contents", []) 15 | ) 16 | else: 17 | medias = ( 18 | data.get("onResponseReceivedCommands", [{}])[0] 19 | .get("appendContinuationItemsAction", {}) 20 | .get("continuationItems", []) 21 | ) 22 | 23 | if len(medias) > 1: 24 | item_selection_renderer, continuation_tracker = medias 25 | else: 26 | item_selection_renderer, continuation_tracker = medias[0], {} 27 | 28 | estimated_results = int(data["estimatedResults"]) 29 | items = item_selection_renderer.get("itemSectionRenderer", []) 30 | continuation_token = ( 31 | continuation_tracker.get("continuationItemRenderer", {}) 32 | .get("continuationEndpoint", {}) 33 | .get("continuationCommand", {}) 34 | .get("token", None) 35 | ) 36 | return estimated_results, items, continuation_token 37 | 38 | 39 | youtube_client_version = "2.20221021.00.00" 40 | youtube_client_name = "1" 41 | 42 | 43 | def youtube_pbj_request( 44 | session, 45 | *args, 46 | params={}, 47 | headers={}, 48 | youtube_client_version=youtube_client_version, 49 | youtube_client_name=youtube_client_name, 50 | **kwargs, 51 | ): 52 | 53 | params.update({"pbj": "1"}) 54 | headers.update( 55 | { 56 | "x-youtube-client-name": youtube_client_name, 57 | "x-youtube-client-version": youtube_client_version, 58 | } 59 | ) 60 | 61 | return session.get( 62 | *args, 63 | params=params, 64 | headers=headers, 65 | **kwargs, 66 | ) 67 | 68 | 69 | def search( 70 | session, 71 | query: str, 72 | *, 73 | sort_by: "filters.SortBy | None" = filters.SortBy.RELEVANCE, 74 | features: "tuple[filters.Feature]" = (), 75 | duration: "filters.Duration" = None, 76 | content_type: "filters.ContentType" = None, 77 | upload_time: "filters.UploadTime" = None, 78 | autocorrect: bool = False, 79 | keep_searching: bool = False, 80 | youtube_client_version=youtube_client_version, 81 | youtube_client_name=youtube_client_name, 82 | ): 83 | 84 | filter_key = filters.get_filter_key( 85 | sort_by=sort_by, 86 | features=features, 87 | duration=duration, 88 | content_type=content_type, 89 | upload_time=upload_time, 90 | autocorrect=autocorrect, 91 | ) 92 | 93 | params = { 94 | "hl": "en", 95 | "search_query": query, 96 | } 97 | 98 | if filter_key: 99 | params["sp"] = filter_key 100 | 101 | youtube_response = youtube_pbj_request( 102 | session, 103 | YOUTUBE_SEARCH_URL, 104 | params=params, 105 | youtube_client_name=youtube_client_name, 106 | youtube_client_version=youtube_client_version, 107 | ) 108 | 109 | _, returned_data = youtube_response.json() 110 | 111 | ( 112 | estimated_results, 113 | item_selection_renderer, 114 | continuation_token, 115 | ) = fetch_data(returned_data["response"], initial=True) 116 | 117 | for component in iter_from_item_section_renderer(item_selection_renderer): 118 | yield component | {"estimated_results": estimated_results} 119 | 120 | if not keep_searching: 121 | return 122 | 123 | component = {} 124 | 125 | context = { 126 | "hl": "en", 127 | "client": { 128 | "clientName": youtube_client_name, 129 | "clientVersion": youtube_client_version, 130 | }, 131 | } 132 | 133 | while component is not None and continuation_token is not None: 134 | 135 | component = None 136 | 137 | response = session.post( 138 | YOUTUBE_SEARCH_API, 139 | params={ 140 | "key": PUBLIC_YOUTUBE_API_KEY, 141 | }, 142 | json={"context": context, "continuation": continuation_token}, 143 | ).json() 144 | 145 | ( 146 | estimated_results, 147 | item_selection_renderer, 148 | continuation_token, 149 | ) = fetch_data(response) 150 | 151 | for component in iter_from_item_section_renderer(item_selection_renderer): 152 | yield estimated_results | {"estimated_results": estimated_results} 153 | --------------------------------------------------------------------------------