├── .gitignore ├── README.md ├── googlelens ├── __init__.py └── googlelens.py ├── requirements.txt └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | **/__pycache__ 2 | main.py -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Google Lens Python 2 | 3 | Google Lens Python is a Python package that allows you to reverse image search on Google Lens using Python, with the ability to search by file path or by URL. 4 | 5 | ## Installation 6 | 7 | You can install Google Lens Python using pip: 8 | 9 | ```sh 10 | pip install git+https://github.com/krishna2206/google-lens-python.git 11 | ``` 12 | 13 | ## Usage 14 | 15 | To use Google Lens Python, import the `GoogleLens` class from the package and create an instance of it: 16 | 17 | ```python 18 | from googlelens import GoogleLens 19 | 20 | lens = GoogleLens() 21 | ``` 22 | 23 | ### Searching by file 24 | 25 | To search by a file path, use the `search_by_file` method and pass in the file path as a string: 26 | 27 | ```python 28 | search_result = lens.search_by_file("path/to/image.jpg") 29 | print(search_result) 30 | ``` 31 | 32 | This will return a dictionary containing the search results. 33 | 34 | ### Searching by URL 35 | 36 | To search by a URL, use the `search_by_url` method and pass in the URL as a string: 37 | 38 | ```python 39 | search_result = lens.search_by_url("https://example.com/image.jpg") 40 | print(search_result) 41 | ``` 42 | 43 | This will return a dictionary containing the search results. 44 | 45 | ## Ideas 46 | 47 | - Implement text detection (if possible) 48 | - Implement translate feature (if possible) 49 | 50 | ## Contributing 51 | 52 | Contributions to the Google Lens Python project are welcome! To contribute, please submit a pull request to the project's [GitHub repository](https://github.com/krishna2206/google-lens-python). 53 | 54 | ## License 55 | 56 | MIT License 57 | 58 | #### Notice 59 | 60 | This license applies only to the files in this repository authored by Anhy Krishna Fitiavana (the "Author"). Other files may be subject to additional or different licenses. 61 | 62 | #### License 63 | 64 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use,copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so,subject to the following conditions: 65 | 66 | 1. The above notice and this permission notice shall be included in all copies or substantial portions of the Software. 67 | 68 | 2. The Software is provided "as is", without warranty of any kind, express or implied, including but not limited to the warranties of merchantability, fitness for a particular purpose and noninfringement. 69 | 70 | 3. In no event shall the Author be liable for any claim, damages or other liability, whether in an action of contract, tort or otherwise, arising from, out of or in connection with the Software or the use or other dealings in the Software. -------------------------------------------------------------------------------- /googlelens/__init__.py: -------------------------------------------------------------------------------- 1 | from .googlelens import GoogleLens -------------------------------------------------------------------------------- /googlelens/googlelens.py: -------------------------------------------------------------------------------- 1 | import re 2 | import json 3 | from requests import Session 4 | from bs4 import BeautifulSoup 5 | 6 | class GoogleLens: 7 | def __init__(self): 8 | """ 9 | Initialize the GoogleLens object. 10 | 11 | Sets up base URL and session with appropriate headers for making requests to Google Lens. 12 | """ 13 | self.url = "https://lens.google.com" 14 | self.session = Session() 15 | 16 | # Update session headers to mimic a standard browser user-agent 17 | self.session.headers.update( 18 | {'User-agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:103.0) Gecko/20100101 Firefox/103.0'} 19 | ) 20 | 21 | def __get_prerender_script(self, page: str): 22 | """ 23 | Extracts the relevant prerendered JavaScript data from the HTML page. 24 | 25 | Parameters: 26 | page (str): The HTML page content as a string. 27 | 28 | Returns: 29 | dict: The extracted and parsed JSON data structure from the prerendered script. 30 | """ 31 | # Parse the HTML content using BeautifulSoup 32 | soup = BeautifulSoup(page, 'html.parser') 33 | 34 | # Find the script containing 'AF_initDataCallback' with specific key and hash values 35 | prerender_script = list(filter( 36 | lambda s: ( 37 | 'AF_initDataCallback(' in s.text and 38 | re.search(r"key: 'ds:(\d+)'", s.text).group(1) == "0"), 39 | soup.find_all('script') 40 | ))[0].text 41 | 42 | # Clean up the script content to prepare it for JSON parsing 43 | prerender_script = prerender_script.replace( 44 | "AF_initDataCallback(", "").replace(");", "") 45 | 46 | # Extract hash value and replace the corresponding fields in the script for JSON formatting 47 | hash = re.search(r"hash: '(\d+)'", prerender_script).group(1) 48 | prerender_script = prerender_script.replace( 49 | f"key: 'ds:0', hash: '{hash}', data:", 50 | f"\"key\": \"ds:0\", \"hash\": \"{hash}\", \"data\":" 51 | ).replace("sideChannel:", "\"sideChannel\":") 52 | 53 | # Parse the cleaned prerender script into a JSON object 54 | prerender_script = json.loads(prerender_script) 55 | 56 | # Return the relevant data section for further processing 57 | return prerender_script['data'][1] 58 | 59 | def __parse_prerender_script(self, prerender_script): 60 | """ 61 | Parses the prerendered script to extract match and similar items. 62 | 63 | Parameters: 64 | prerender_script (dict): The parsed JSON data from the prerender script. 65 | 66 | Returns: 67 | dict: A dictionary containing the main match and visually similar matches. 68 | """ 69 | # Initialize the result dictionary 70 | data = { 71 | "match": None, 72 | "similar": [] 73 | } 74 | 75 | # Extract the best match information if available 76 | try: 77 | data["match"] = { 78 | "title": prerender_script[0][1][8][12][0][0][0], # Extract item title 79 | "thumbnail": prerender_script[0][1][8][12][0][2][0][0], # Extract thumbnail URL 80 | "pageURL": prerender_script[0][1][8][12][0][2][0][4] # Extract page URL 81 | } 82 | except IndexError: 83 | # If data is unavailable, continue without a match 84 | pass 85 | 86 | # Determine which section to use for extracting visual matches 87 | if data["match"] is not None: 88 | visual_matches = prerender_script[1][1][8][8][0][12] 89 | else: 90 | try: 91 | visual_matches = prerender_script[0][1][8][8][0][12] 92 | except IndexError: 93 | return data 94 | 95 | # Iterate through the visual matches and extract relevant details 96 | for match in visual_matches: 97 | # Safely extract thumbnail URL if available 98 | thumbnail_url = match[0][0] if ( 99 | isinstance(match[0], list) and len(match[0]) > 0 and 100 | isinstance(match[0][0], str) 101 | ) else None 102 | 103 | # Safely extract price if available 104 | price = match[0][7][1] if ( 105 | isinstance(match[0], list) and len(match[0]) > 7 and 106 | isinstance(match[0][7], list) and len(match[0][7]) > 1 and 107 | isinstance(match[0][7][1], str) 108 | ) else None 109 | 110 | # Clean price by removing any special characters (e.g., currency signs) 111 | price = re.sub(r"[^\d.]", "", price) if price is not None else None 112 | 113 | # Safely extract currency if available 114 | currency = match[0][7][5] if ( 115 | isinstance(match[0], list) and len(match[0]) > 7 and 116 | isinstance(match[0][7], list) and len(match[0][7]) > 5 and 117 | isinstance(match[0][7][5], str) 118 | ) else None 119 | 120 | # Append the extracted information to the "similar" matches list 121 | data["similar"].append( 122 | { 123 | "title": match[3], # Extract item title 124 | "similarity score": match[1], # Extract similarity (?) score 125 | "thumbnail": thumbnail_url, # Thumbnail URL 126 | "pageURL": match[5], # Extract page URL 127 | "sourceWebsite": match[14], # Extract source website name 128 | "price": price, # Price (cleaned) 129 | "currency": currency # Currency symbol 130 | } 131 | ) 132 | 133 | # Return the results dictionary 134 | return data 135 | 136 | def search_by_file(self, file_path: str): 137 | """ 138 | Perform an image-based search by uploading a file. 139 | 140 | Parameters: 141 | file_path (str): The path to the image file that will be used for the search. 142 | 143 | Returns: 144 | The parsed search results after extracting and processing the response. 145 | """ 146 | multipart = { 147 | 'encoded_image': (file_path, open(file_path, 'rb')), 148 | 'image_content': '' 149 | } 150 | 151 | # Build the parameter dictionary 152 | params = { 153 | "hl": "en", # Adjust host language here 154 | "gl": "us", # Adjust the geolocation parameter here 155 | } 156 | 157 | # Send a POST request to upload the file 158 | response = self.session.post( 159 | self.url + "/upload", 160 | files=multipart, 161 | params=params, 162 | allow_redirects=False # Must be false to capture the 302 response 163 | ) 164 | 165 | # Check if the request was successful 166 | if response.status_code != 302: # Expecting a 302 for redirect 167 | print(f"Error uploading file: Status code {response.status_code}") 168 | print(response.text) 169 | return None # Or handle the error appropriately 170 | 171 | # Get the redirect URL from the 'Location' header 172 | search_url = response.headers.get('Location') 173 | 174 | # If redirect URL is not found, print an error and return 175 | if search_url is None: 176 | print("Redirect URL not found in response headers.") 177 | return None # Or handle the error appropriately 178 | 179 | # Proceed with the redirect 180 | response = self.session.get(search_url) 181 | 182 | # Extract the prerendered JavaScript content for further parsing. 183 | prerender_script = self.__get_prerender_script(response.text) 184 | 185 | # Parse the prerender script and return the processed search result. 186 | return self.__parse_prerender_script(prerender_script) 187 | 188 | def search_by_url(self, url: str): 189 | """ 190 | Perform an image-based search by providing an image URL. 191 | 192 | Parameters: 193 | url (str): The URL of the image that will be used for the search. 194 | 195 | Returns: 196 | The parsed search results after extracting and processing the response. 197 | """ 198 | # Build the parameter dictionary 199 | params = { 200 | "url": url, 201 | "hl": "en", # Adjust host language here 202 | "gl": "us", # Adjust geolocation here 203 | } 204 | # Send a GET request to the provided URL 205 | response = self.session.get( 206 | self.url + "/uploadbyurl", 207 | params=params, 208 | allow_redirects=True 209 | ) 210 | 211 | # Extract the prerendered JavaScript content for further parsing 212 | prerender_script = self.__get_prerender_script(response.text) 213 | 214 | # Parse the prerender script and return the processed search result 215 | return self.__parse_prerender_script(prerender_script) -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | requests 2 | beautifulsoup4 -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | VERSION = '2023.03.18' 4 | DESCRIPTION = 'A Python package to reverse image search in Google Lens' 5 | LONG_DESCRIPTION = 'A Python package to reverse image search in Google Lens, with the ability to search by file path or by url.' 6 | 7 | setup( 8 | name="google-lens-python", 9 | version=VERSION, 10 | author="Anhy Krishna Fitiavana", 11 | author_email="fitiavana.krishna@gmail.com", 12 | description=DESCRIPTION, 13 | long_description=LONG_DESCRIPTION, 14 | packages=find_packages(), 15 | install_requires=['requests', 'bs4'], 16 | keywords=['python', 'google', 'scraping'], 17 | classifiers=[ 18 | "Development Status :: 5 - Production/Stable", 19 | "Intended Audience :: Developers", 20 | "Programming Language :: Python :: 3", 21 | "Operating System :: OS Independent", 22 | ] 23 | ) --------------------------------------------------------------------------------