├── LICENSE ├── README.md ├── examples ├── google_lens.png └── google_lens0.png ├── pyproject.toml ├── src └── google_img_source_search │ ├── __init__.py │ ├── batchexecute_decoder.py │ ├── exceptions.py │ ├── f_req_template.py │ ├── google_items │ ├── __init__.py │ ├── image.py │ └── search_item.py │ ├── image_file_uploader.py │ ├── image_source_searcher.py │ ├── image_uploader.py │ ├── reverse_image_searcher.py │ └── safe_modes.py └── tests ├── test.png └── tests_reverse_image_searcher.py /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Vorrik 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Google Reverse Image Search 2 | [![PyPI version](https://badge.fury.io/py/google-image-source-search.svg?)](https://badge.fury.io/py/google-image-source-search) 3 | 4 | An unofficial simple solution to search web pages using the specified image. Written on pure requests 5 | 6 | ![Google lens](https://raw.githubusercontent.com/Vorrik/Google-Image-Source-Search/master/examples/google_lens0.png) 7 | 8 | ## Installation 9 | ```sh 10 | > pip install google-image-source-search 11 | ``` 12 | 13 | ## Usage 14 | ```py 15 | from google_img_source_search import ReverseImageSearcher 16 | 17 | 18 | if __name__ == '__main__': 19 | image_url = 'https://i.pinimg.com/originals/c4/50/35/c450352ac6ea8645ead206721673e8fb.png' 20 | 21 | rev_img_searcher = ReverseImageSearcher() 22 | res = rev_img_searcher.search(image_url) 23 | 24 | for search_item in res: 25 | print(f'Title: {search_item.page_title}') 26 | print(f'Site: {search_item.page_url}') 27 | print(f'Img: {search_item.image_url}\n') 28 | ``` 29 | #### Searching by file 30 | ```py 31 | rev_img_searcher.search_by_file('test.png') 32 | ``` 33 | #### Switching safe mode 34 | ```py 35 | from google_img_source_search import SafeMode 36 | rev_img_searcher.switch_safe_mode(SafeMode.DISABLED) 37 | ``` 38 | #### Passing custom session 39 | ```py 40 | import requests 41 | session = requests.Session() 42 | rev_img_searcher = ReverseImageSearcher(session) 43 | ``` 44 | 45 | ### Results 46 | #### Output: 47 | ``` 48 | Title: WAIFU OR LAIFU? - YouTube 49 | Site: https://www.youtube.com/watch?v=F8l5OgLpuyM 50 | Img: https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcSKg0oMfWeMyBo0-KRerecMaRXNLI2zTLqmyXc0TgDS7nWJx3aB 51 | 52 | Title: Печальный факт о Эмилии из аниме "Re:Zero. Жизнь с нуля в альтернативном мире" | AniGAM | Дзен 53 | Site: https://dzen.ru/a/ZBs3vFvEYSvIxPhv 54 | Img: https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTA-RDMY-xUrV5Qqn5fYjJ9qFsZC2Posk16qHkWh4sdnVP5Leh7 55 | 56 | Title: Stream chocho music | Listen to songs, albums, playlists for free on SoundCloud 57 | Site: https://soundcloud.com/chocho-329200480 58 | Img: https://encrypted-tbn1.gstatic.com/images?q=tbn:ANd9GcQycLi6Ug9JtqKp5t6irb-3Pbj26DtTT48P-R38lOqVI5pXCwYz 59 | ``` 60 | #### Google lens: 61 | ![Google lens](https://raw.githubusercontent.com/Vorrik/Google-Image-Source-Search/master/examples/google_lens.png) 62 | -------------------------------------------------------------------------------- /examples/google_lens.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vorrik/Google-Reverse-Image-Search/9ccec75a0328114821fb26ad3539f1923536a426/examples/google_lens.png -------------------------------------------------------------------------------- /examples/google_lens0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vorrik/Google-Reverse-Image-Search/9ccec75a0328114821fb26ad3539f1923536a426/examples/google_lens0.png -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "google-image-source-search" 3 | version = "1.2.2" 4 | authors = [ 5 | { name="Vorrik", email="author@example.com" }, 6 | ] 7 | description = "A package to search image sources using google services" 8 | readme = "README.md" 9 | requires-python = ">=3.7" 10 | 11 | classifiers = [ 12 | "Programming Language :: Python :: 3", 13 | "License :: OSI Approved :: MIT License", 14 | "Operating System :: OS Independent", 15 | ] 16 | dependencies = [ 17 | "requests <= 2.31.0", 18 | ] 19 | 20 | [project.urls] 21 | "Homepage" = "https://github.com/Vorrik/Google-Image-Source-Search" 22 | "Bug Tracker" = "https://github.com/Vorrik/Google-Image-Source-Search/issues" 23 | 24 | [build-system] 25 | requires = ["setuptools", "wheel"] 26 | build-backend = "setuptools.build_meta" -------------------------------------------------------------------------------- /src/google_img_source_search/__init__.py: -------------------------------------------------------------------------------- 1 | from .reverse_image_searcher import ReverseImageSearcher 2 | from .safe_modes import SafeMode 3 | from .exceptions import * 4 | -------------------------------------------------------------------------------- /src/google_img_source_search/batchexecute_decoder.py: -------------------------------------------------------------------------------- 1 | # https://github.com/pndurette/pybatchexecute/blob/main/pybatchexecute/decode.py 2 | 3 | import json 4 | import re 5 | from typing import List, Tuple 6 | 7 | __all__ = ["decode"] 8 | 9 | 10 | class BatchExecuteDecodeException(Exception): 11 | pass 12 | 13 | 14 | def _decode_rt_compressed( 15 | raw: str, strict: bool = False 16 | ) -> List[Tuple[int, str, list]]: 17 | """Decode a raw response from a ``batchexecute`` RPC 18 | made with an ``rt`` (response type) of ``c`` (compressed) 19 | 20 | Raw responses are of the form:: 21 | 22 | )]}' 23 | 24 | 25 | 26 | <...> 27 | 28 | 29 | 30 | Envelopes are a JSON array wrapped in an array, of the form (e.g.):: 31 | 32 | [["wrb.fr","jQ1olc","[\"abc\"]\n",null,null,null,"generic"]] 33 | ^^^^^^^^ ^^^^^^ ^^^^^^^^^^^^ ^^^^^^^^^ 34 | [0][0] [0][1] [0][2] [0][6] 35 | constant rpc id rpc response envelope index or 36 | (str) (str) (json str) "generic" if single envelope 37 | (str) 38 | 39 | Args: 40 | raw (str): The raw response from a ``batchexecute`` RPC 41 | 42 | Returns: 43 | list: A list of tuples, each tuple containing: 44 | index (int): The index of the response 45 | rpcid (str): The ``rpcid`` of the response 46 | data (list): The decoded JSON data of the response 47 | 48 | Raises: 49 | BatchExecuteDecodeException: If any response data is not a valid JSON string 50 | BatchExecuteDecodeException: If any response data is empty (if ``strict`` is ``True``) 51 | 52 | """ 53 | 54 | # Regex pattern to extract raw data responses (envelopes) 55 | p = re.compile( 56 | pattern=r""" 57 | (\d+\n) # <\n> 58 | (?P.+?) # 'envelope': anything incl. <\n> (re.DOTALL) 59 | (?=\d+\n|$) # until <\n> or 60 | """, 61 | flags=re.DOTALL | re.VERBOSE, 62 | ) 63 | 64 | decoded = [] 65 | 66 | for item in p.finditer(raw): 67 | # An 'envelope' group is a json string 68 | # e.g.: '[["wrb.fr","jQ1olc","[\"abc\"]\n",null,null,null,"generic"]]' 69 | # ^^^^^^^^ ^^^^^^ ^^^^^^^^^^^^ ^^^^^^^^^ 70 | # [0][0] [0][1] [0][2] [0][6] 71 | # constant rpc id rpc response envelope index or 72 | # (str) (str) (json str) "generic" if single envelope 73 | # (str) 74 | envelope_raw = item.group("envelope") 75 | envelope = json.loads(envelope_raw) 76 | 77 | # Ignore envelopes that don't have 'wrb.fr' at [0][0] 78 | # (they're not rpc reponses but analytics etc.) 79 | if envelope[0][0] != "wrb.fr": 80 | continue 81 | 82 | # index (at [0][6], string) 83 | # index is 1-based 84 | # index is "generic" if the response contains a single envelope 85 | if envelope[0][6] == "generic": 86 | index = 1 87 | else: 88 | index = int(envelope[0][6]) 89 | 90 | # rpcid (at [0][1]) 91 | # rpcid's response (at [0][2], a json string) 92 | rpcid = envelope[0][1] 93 | 94 | try: 95 | data = json.loads(envelope[0][2]) 96 | except json.decoder.JSONDecodeError as e: 97 | raise BatchExecuteDecodeException( 98 | f"Envelope {index} ({rpcid}): data is not a valid JSON string. " 99 | + "JSON decode error was: " 100 | + str(e) 101 | ) 102 | 103 | if strict and data == []: 104 | raise BatchExecuteDecodeException( 105 | f"Envelope {index} ({rpcid}): data is empty (strict)." 106 | ) 107 | 108 | # Append as tuple 109 | decoded.append((index, rpcid, data)) 110 | 111 | return decoded 112 | 113 | 114 | def _decode_rt_default(raw: str, strict: bool = False) -> List[Tuple[int, str, list]]: 115 | """Decode a raw response from a ``batchexecute`` RPC 116 | made with no ``rt`` (response type) value 117 | 118 | Raw response is a JSON array (minus the first two lines) of the form:: 119 | 120 | )]}' 121 | 122 | [,<...>,] 123 | 124 | Envelopes are a JSON arrat of the form (e.g.):: 125 | 126 | ["wrb.fr","jQ1olc","[\"abc\"]\n",null,null,null,"generic"] 127 | ^^^^^^^^ ^^^^^^ ^^^^^^^^^^^^ ^^^^^^^^^ 128 | [0][0] [0][1] [0][2] [0][6] 129 | constant rpc id rpc response envelope index or 130 | (str) (str) (json str) "generic" if single envelope 131 | (str) 132 | 133 | 134 | Args: 135 | raw (str): The raw response from a ``batchexecute`` RPC 136 | 137 | Returns: 138 | list: A list of tuples, each tuple containing: 139 | index (int): The index of the response 140 | rpcid (str): The ``rpcid`` of the response 141 | data (list): The decoded JSON data of the response 142 | 143 | Raises: 144 | BatchExecuteDecodeException: If any response data is not a valid JSON string 145 | BatchExecuteDecodeException: If any response data is empty (if ``strict`` is ``True``) 146 | 147 | """ 148 | 149 | # Trim the first 2 lines 150 | # ")]}'" and an empty line 151 | envelopes_raw = "".join(raw.split("\n")[2:]) 152 | 153 | # Load all envelopes JSON (list of envelopes) 154 | envelopes = json.loads(envelopes_raw) 155 | 156 | decoded = [] 157 | 158 | for envelope in envelopes: 159 | # Ignore envelopes that don't have 'wrb.fr' at [0] 160 | # (they're not rpc reponses but analytics etc.) 161 | if envelope[0] != "wrb.fr": 162 | continue 163 | 164 | # index (at [6], string) 165 | # index is 1-based 166 | # index is "generic" if the response contains a single envelope 167 | if envelope[6] == "generic": 168 | index = 1 169 | else: 170 | index = int(envelope[6]) 171 | 172 | # rpcid (at [1]) 173 | # rpcid's response (at [2], a json string) 174 | rpcid = envelope[1] 175 | 176 | try: 177 | data = json.loads(envelope[2]) 178 | except json.decoder.JSONDecodeError as e: 179 | raise BatchExecuteDecodeException( 180 | f"Envelope {index} ({rpcid}): data is not a valid JSON string. " 181 | + "JSON decode error was: " 182 | + str(e) 183 | ) 184 | 185 | if strict and data == []: 186 | raise BatchExecuteDecodeException( 187 | f"Envelope {index} ({rpcid}): data is empty (strict)." 188 | ) 189 | 190 | # Append as tuple 191 | decoded.append((index, rpcid, data)) 192 | 193 | return decoded 194 | 195 | 196 | def decode(raw: str, rt: str = None, strict: bool = False, expected_rpcids: list = []): 197 | """Decode a raw response from a ``batchexecute`` RPC 198 | 199 | Args: 200 | raw (str): The raw response text from a ``batchexecute`` RPC 201 | rt (str): The ``rt`` parameter used in the ``batchexecute`` RPC (default: ``None``) 202 | strict (bool): Whether to raise an exception if the response is empty 203 | or the input ``rpcid``s are different from the output ``rpcid``s (default: ``False``) 204 | expected_rpcids (list): A list of expected ``rpcid`` values, 205 | ignored if ``strict`` is ``False`` (default: ``[]``) 206 | 207 | Returns: 208 | list: A list of tuples, each tuple containing: 209 | * ``index`` (int): The index of the response 210 | * ``rpcid`` (str): The ``rpcid`` of the response 211 | * ``data`` (list): The JSON data returned by the ``rpcid`` function 212 | 213 | Raises: 214 | ValueError: If ``rt`` is not ``"c"``, ``"b"``, or ``None`` 215 | BatchExecuteDecodeException: If nothing could be decoded 216 | BatchExecuteDecodeException: If the count of input and output ``rpcid``s is different 217 | (if ``strict`` is ``True``) 218 | BatchExecuteDecodeException: If the input and out ``rpcid``s are different 219 | (if ``strict`` is ``True``) 220 | 221 | """ 222 | if rt == "c": 223 | decoded = _decode_rt_compressed(raw, strict=strict) 224 | elif rt == "b": 225 | raise ValueError("Decoding 'rt' as 'b' (ProtoBuf) is not implemented") 226 | elif rt is None: 227 | decoded = _decode_rt_default(raw, strict=strict) 228 | else: 229 | raise ValueError("Invalid 'rt' value") 230 | 231 | # Nothing was decoded 232 | if len(decoded) == 0: 233 | raise BatchExecuteDecodeException( 234 | "Could not decode any envelope. Check format of 'raw'." 235 | ) 236 | 237 | # Sort responses by index ([0]) 238 | decoded = sorted(decoded, key=lambda envelope: envelope[0]) 239 | 240 | if strict: 241 | in_rpcids = expected_rpcids 242 | out_rpcids = [rpcid for _, rpcid, _ in decoded] 243 | 244 | in_len = len(in_rpcids) 245 | out_len = len(out_rpcids) 246 | 247 | if in_len != out_len: 248 | raise BatchExecuteDecodeException( 249 | "Strict: mismatch in/out rcpids count, " 250 | + f"expected: {in_len}, got: {out_len}." 251 | ) 252 | 253 | in_set = sorted(set(in_rpcids)) 254 | out_set = sorted(set(out_rpcids)) 255 | 256 | if in_set != out_set: 257 | raise BatchExecuteDecodeException( 258 | "Strict: mismatch in/out rcpids, " 259 | + f"expected: {in_set}, got: {out_set}." 260 | ) 261 | 262 | return decoded -------------------------------------------------------------------------------- /src/google_img_source_search/exceptions.py: -------------------------------------------------------------------------------- 1 | class GoogleImageSearcherError(Exception): 2 | pass 3 | 4 | 5 | class ImageUploadError(GoogleImageSearcherError): 6 | pass 7 | 8 | 9 | class InvalidImageURL(ImageUploadError): 10 | pass 11 | 12 | 13 | class InvalidOrUnsupportedImageFile(ImageUploadError): 14 | pass 15 | 16 | 17 | class SafeModeSwitchError(GoogleImageSearcherError): 18 | pass 19 | -------------------------------------------------------------------------------- /src/google_img_source_search/f_req_template.py: -------------------------------------------------------------------------------- 1 | def build_f_req(id_1: str, id_2: str): 2 | return rf'[[["B7fdke","[[\"{id_1}\",1,1],[null,null,null,null,null,null,' \ 3 | r'[\"\"],' \ 4 | rf'[\"{id_2}\",[null,null,0,0]]],[null,null,' \ 5 | r'null,null,3,[\"en-US\",null,\"US\",\"Europe/Moscow\"],null,null,[null,null,null,null,null,null,null,null,' \ 6 | r'null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,' \ 7 | r'null,null,null,null,null,1,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,' \ 8 | r'null,null,null,null,null,null,null,null,null,null,null,null,null,null,1,null,null,null,null,null,null,' \ 9 | r'null,null,null,null,null,null,1,null,null,null,null,null,1,null,null,null,1,null,null,null,null,null,' \ 10 | r'null,1,1,null,null,null,null,1,null,1,null,null,1],[[null,1,1,1,1,1,1,null,null,null,1,1,1,1,null,null,' \ 11 | r'null,1,null,null,null,null,null,null,null,null,null,1,null,null,null,null,null,null,null,null,1,null,' \ 12 | r'null,null,null,null,null,null,1,null,null,null,null,null,null,null,null,1,null,null,null,null,null,null,' \ 13 | r'null,null,null,null,null,null,null,null,null,1,null,null,null,1,null,null,null,1,null,null,null,null,' \ 14 | r'null,null,1,1,1,null,1,1,null,null,null,null,0,0,null,null,null,[5,6,2],null,null,null,1,null,1,null,1,' \ 15 | r'null,null,null,0,null,null,null,null,1,0,0,0,null,null,null,1,null,null,null,null,null,0,0,1,null,null,' \ 16 | r'1,1,null,null,null,null,null,null,1,null,0,null,0]],[[[7]]],null,null,null,26,null,null,null,null,[null,' \ 17 | r'6],[null,16],null,[16],[]],null,null,null,null,null,null,[],null,null,null,null,null,null,[],' \ 18 | r'\"EkcKJDU3NDA3NTNjLWVmNDgtNDViZi04NzExLTQ2ODFlZDkwZGE2MRIfWTlTMkhEQkhiLVFmNEhFeFhiLWI5ZUQ3UEhFNm1SZw' \ 19 | r'==\",null,null,null,null,null,[[null,[]],[null,null,null,null,null,null,' \ 20 | r'[\"\"],' \ 21 | r'[\"\",[null,null,0,0]]]],null,' \ 22 | r'\"\"]",null,"generic"]]]' 23 | -------------------------------------------------------------------------------- /src/google_img_source_search/google_items/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vorrik/Google-Reverse-Image-Search/9ccec75a0328114821fb26ad3539f1923536a426/src/google_img_source_search/google_items/__init__.py -------------------------------------------------------------------------------- /src/google_img_source_search/google_items/image.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | 4 | @dataclass 5 | class Image: 6 | id_1: str 7 | id_2: str 8 | -------------------------------------------------------------------------------- /src/google_img_source_search/google_items/search_item.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | 4 | @dataclass 5 | class SearchItem: 6 | page_url: str 7 | page_title: str 8 | image_url: str 9 | -------------------------------------------------------------------------------- /src/google_img_source_search/image_file_uploader.py: -------------------------------------------------------------------------------- 1 | import os 2 | import mimetypes 3 | 4 | from .image_uploader import ImageUploader 5 | from .exceptions import InvalidOrUnsupportedImageFile 6 | from .google_items.image import Image 7 | 8 | 9 | class ImageFileUploader(ImageUploader): 10 | 11 | def upload(self, image_path: str) -> Image: 12 | image_file = open(image_path, 'rb') 13 | multipart = {'encoded_image': (os.path.basename(image_path), image_file, mimetypes.guess_type(image_path)[0])} 14 | upload_response = self.session.post('https://lens.google.com/v3/upload', files=multipart) 15 | image_file.close() 16 | 17 | if "{'ds:0'" not in upload_response.text: 18 | raise InvalidOrUnsupportedImageFile() 19 | 20 | return self.extract_image(upload_response.text) 21 | -------------------------------------------------------------------------------- /src/google_img_source_search/image_source_searcher.py: -------------------------------------------------------------------------------- 1 | from requests import Session 2 | 3 | from .google_items.search_item import SearchItem 4 | from .batchexecute_decoder import decode 5 | from .f_req_template import build_f_req 6 | from .google_items.image import Image 7 | 8 | 9 | class ImageSourceSearcher: 10 | def __init__(self, session: Session): 11 | self.session = session 12 | 13 | @staticmethod 14 | def extract_search_items(src_response: str) -> list[SearchItem]: 15 | """ Extracts image sources """ 16 | 17 | decoded_response = decode(src_response, 'c') 18 | if len(decoded_response[0][2][1][0][1][8]) < 21: 19 | return [] # No images found error 20 | 21 | return [SearchItem(page_url=search_item[2][2][2], page_title=search_item[1][0], image_url=search_item[0][0]) 22 | for search_item in decoded_response[0][2][1][0][1][8][20][0][0]] 23 | 24 | def search(self, image: Image) -> list[SearchItem]: 25 | params = { 26 | 'soc-app': '1', 27 | 'soc-platform': '1', 28 | 'soc-device': '1', 29 | 'rt': 'c' 30 | } 31 | 32 | data = { 33 | 'f.req': build_f_req(image.id_1, image.id_2) 34 | } 35 | 36 | src_response = self.session.post('https://lens.google.com/_/LensWebStandaloneUi/data/batchexecute', 37 | params=params, data=data, allow_redirects=True) 38 | 39 | return self.extract_search_items(src_response.text) 40 | -------------------------------------------------------------------------------- /src/google_img_source_search/image_uploader.py: -------------------------------------------------------------------------------- 1 | import re 2 | import json 3 | from requests import Session 4 | 5 | from .exceptions import InvalidImageURL 6 | from .google_items.image import Image 7 | 8 | 9 | class ImageUploader: 10 | def __init__(self, session: Session): 11 | self.session = session 12 | 13 | @staticmethod 14 | def extract_image(upload_response: str) -> Image: 15 | """ Extracts image object from upload response """ 16 | 17 | js_obj = re.search(r'(?<=AF_dataServiceRequests = ).*?(?=; var)', upload_response).group(0) 18 | py_obj = json.loads(re.sub(r'([{\s,])(\w+)(:)', r'\1"\2"\3', js_obj.replace("'", '"'))) 19 | 20 | id_1 = py_obj['ds:0']['request'][0][0] 21 | id_2 = py_obj['ds:0']['request'][1][7][0] 22 | 23 | return Image(id_1, id_2) 24 | 25 | def upload(self, image_url: str) -> Image: 26 | upload_response = self.session.get('https://lens.google.com/uploadbyurl', params={'url': image_url}) 27 | 28 | if "{'ds:0'" not in upload_response.text: 29 | raise InvalidImageURL() 30 | 31 | return self.extract_image(upload_response.text) 32 | -------------------------------------------------------------------------------- /src/google_img_source_search/reverse_image_searcher.py: -------------------------------------------------------------------------------- 1 | import re 2 | from requests import Session 3 | 4 | from .google_items.search_item import SearchItem 5 | from .safe_modes import SafeMode 6 | from .exceptions import SafeModeSwitchError 7 | 8 | from .image_uploader import ImageUploader 9 | from .image_file_uploader import ImageFileUploader 10 | from .image_source_searcher import ImageSourceSearcher 11 | 12 | 13 | class ReverseImageSearcher: 14 | def __init__(self, session=None, image_uploader=None, image_file_uploader=None, image_source_searcher=None): 15 | self.session = session or Session() 16 | self.image_uploader = image_uploader or ImageUploader(self.session) 17 | self.image_file_uploader = image_file_uploader or ImageFileUploader(self.session) 18 | self.image_source_searcher = image_source_searcher or ImageSourceSearcher(self.session) 19 | 20 | self.session.headers.update( 21 | {'User-agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:103.0) Gecko/20100101 Firefox/103.0'} 22 | ) 23 | 24 | # Bypassing EU consent request 25 | self.session.cookies.update({ 26 | 'CONSENT': 'PENDING+987', 27 | 'SOCS': 'CAESHAgBEhJnd3NfMjAyMzA4MTAtMF9SQzIaAmRlIAEaBgiAo_CmBg' 28 | }) 29 | 30 | self.session.hooks = { 31 | 'response': lambda r, *args, **kwargs: r.raise_for_status() 32 | } 33 | 34 | def switch_safe_mode(self, safe_mode: SafeMode): 35 | safe_search_response = self.session.get('https://google.com/safesearch') 36 | 37 | switch_attr = {SafeMode.DISABLED: 'data-setprefs-off-url', 38 | SafeMode.BLUR: 'data-setprefs-blur-url', 39 | SafeMode.FILTER: 'data-setprefs-filter-url'}[safe_mode] 40 | 41 | switch_params = re.search(rf'(?<={switch_attr}=").*?(?=")', safe_search_response.text).group(0) 42 | switch_response = self.session.get(f'https://google.com{switch_params.replace("amp;", "")}') 43 | 44 | if switch_response.status_code != 204: 45 | raise SafeModeSwitchError() 46 | 47 | def __search(self, image_uploader: ImageUploader, image: str) -> list[SearchItem]: 48 | google_image = image_uploader.upload(image) 49 | return self.image_source_searcher.search(google_image) 50 | 51 | def search(self, image_url: str) -> list[SearchItem]: 52 | """ 53 | Searches for web pages using the specified image. By image url 54 | :return: list of search items (page url, title, image url) 55 | """ 56 | 57 | return self.__search(self.image_uploader, image_url) 58 | 59 | def search_by_file(self, image_path: str) -> list[SearchItem]: 60 | """ 61 | Searches for web pages using the specified image. By image file path 62 | :return: list of search items (page url, title, image url) 63 | """ 64 | 65 | return self.__search(self.image_file_uploader, image_path) 66 | -------------------------------------------------------------------------------- /src/google_img_source_search/safe_modes.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | 4 | class SafeMode(Enum): 5 | DISABLED = 1 6 | BLUR = 2 7 | FILTER = 3 8 | -------------------------------------------------------------------------------- /tests/test.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vorrik/Google-Reverse-Image-Search/9ccec75a0328114821fb26ad3539f1923536a426/tests/test.png -------------------------------------------------------------------------------- /tests/tests_reverse_image_searcher.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from src.google_img_source_search.reverse_image_searcher import ReverseImageSearcher, SafeMode 3 | 4 | 5 | class TestReverseImageSearcher(unittest.TestCase): 6 | 7 | def setUp(self): 8 | self.reverse_image_searcher = ReverseImageSearcher() 9 | 10 | def test_search(self): 11 | results = self.reverse_image_searcher.search('https://i.pinimg.com/originals/c4/50/35' 12 | '/c450352ac6ea8645ead206721673e8fb.png') 13 | self.assertTrue(results, 'No results') 14 | 15 | def test_search_by_file(self): 16 | results = self.reverse_image_searcher.search_by_file('test.png') 17 | self.assertTrue(results, 'No results') 18 | 19 | def test_switch_safe_mode(self): 20 | self.reverse_image_searcher.switch_safe_mode(SafeMode.DISABLED) 21 | self.reverse_image_searcher.switch_safe_mode(SafeMode.BLUR) 22 | self.reverse_image_searcher.switch_safe_mode(SafeMode.FILTER) 23 | --------------------------------------------------------------------------------