├── .gitignore
├── README.md
├── googlelens
    ├── __init__.py
    └── googlelens.py
├── requirements.txt
└── setup.py


/.gitignore:
--------------------------------------------------------------------------------
1 | **/__pycache__
2 | main.py


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Google Lens Python
 2 | 
 3 | Google Lens Python is a Python package that allows you to reverse image search on Google Lens using Python, with the ability to search by file path or by URL.
 4 | 
 5 | ## Installation
 6 | 
 7 | You can install Google Lens Python using pip:
 8 | 
 9 | ```sh
10 | pip install git+https://github.com/krishna2206/google-lens-python.git
11 | ```
12 | 
13 | ## Usage
14 | 
15 | To use Google Lens Python, import the `GoogleLens` class from the package and create an instance of it:
16 | 
17 | ```python
18 | from googlelens import GoogleLens
19 | 
20 | lens = GoogleLens()
21 | ```
22 | 
23 | ### Searching by file
24 | 
25 | To search by a file path, use the `search_by_file` method and pass in the file path as a string:
26 | 
27 | ```python
28 | search_result = lens.search_by_file("path/to/image.jpg")
29 | print(search_result)
30 | ```
31 | 
32 | This will return a dictionary containing the search results.
33 | 
34 | ### Searching by URL
35 | 
36 | To search by a URL, use the `search_by_url` method and pass in the URL as a string:
37 | 
38 | ```python
39 | search_result = lens.search_by_url("https://example.com/image.jpg")
40 | print(search_result)
41 | ```
42 | 
43 | This will return a dictionary containing the search results.
44 | 
45 | ## Ideas
46 | 
47 | - Implement text detection (if possible)
48 | - Implement translate feature (if possible)
49 | 
50 | ## Contributing
51 | 
52 | Contributions to the Google Lens Python project are welcome! To contribute, please submit a pull request to the project's [GitHub repository](https://github.com/krishna2206/google-lens-python). 
53 | 
54 | ## License
55 | 
56 | MIT License
57 | 
58 | #### Notice
59 | 
60 | This license applies only to the files in this repository authored by Anhy Krishna Fitiavana (the "Author"). Other files may be subject to additional or different licenses.
61 | 
62 | #### License
63 | 
64 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use,copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so,subject to the following conditions: 
65 | 
66 | 1. The above notice and this permission notice shall be included in all copies or substantial portions of the Software.
67 | 
68 | 2. The Software is provided "as is", without warranty of any kind, express or implied, including but not limited to the warranties of merchantability, fitness for a particular purpose and noninfringement. 
69 | 
70 | 3. In no event shall the Author be liable for any claim, damages or other liability, whether in an action of contract, tort or otherwise, arising from, out of or in connection with the Software or the use or other dealings in the Software.


--------------------------------------------------------------------------------
/googlelens/__init__.py:
--------------------------------------------------------------------------------
1 | from .googlelens import GoogleLens


--------------------------------------------------------------------------------
/googlelens/googlelens.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | import json
  3 | from requests import Session
  4 | from bs4 import BeautifulSoup
  5 | 
  6 | class GoogleLens:
  7 |     def __init__(self):
  8 |         """
  9 |         Initialize the GoogleLens object.
 10 | 
 11 |         Sets up base URL and session with appropriate headers for making requests to Google Lens.
 12 |         """
 13 |         self.url = "https://lens.google.com"
 14 |         self.session = Session()
 15 |         
 16 |         # Update session headers to mimic a standard browser user-agent
 17 |         self.session.headers.update(
 18 |             {'User-agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:103.0) Gecko/20100101 Firefox/103.0'}
 19 |         )
 20 | 
 21 |     def __get_prerender_script(self, page: str):
 22 |         """
 23 |         Extracts the relevant prerendered JavaScript data from the HTML page.
 24 | 
 25 |         Parameters:
 26 |         page (str): The HTML page content as a string.
 27 | 
 28 |         Returns:
 29 |         dict: The extracted and parsed JSON data structure from the prerendered script.
 30 |         """
 31 |         # Parse the HTML content using BeautifulSoup
 32 |         soup = BeautifulSoup(page, 'html.parser')
 33 | 
 34 |         # Find the script containing 'AF_initDataCallback' with specific key and hash values
 35 |         prerender_script = list(filter(
 36 |             lambda s: (
 37 |                 'AF_initDataCallback(' in s.text and
 38 |                 re.search(r"key: 'ds:(\d+)'", s.text).group(1) == "0"),
 39 |             soup.find_all('script')
 40 |         ))[0].text
 41 |         
 42 |         # Clean up the script content to prepare it for JSON parsing
 43 |         prerender_script = prerender_script.replace(
 44 |             "AF_initDataCallback(", "").replace(");", "")
 45 |         
 46 |         # Extract hash value and replace the corresponding fields in the script for JSON formatting
 47 |         hash = re.search(r"hash: '(\d+)'", prerender_script).group(1)
 48 |         prerender_script = prerender_script.replace(
 49 |             f"key: 'ds:0', hash: '{hash}', data:",
 50 |             f"\"key\": \"ds:0\", \"hash\": \"{hash}\", \"data\":"
 51 |         ).replace("sideChannel:", "\"sideChannel\":")
 52 | 
 53 |         # Parse the cleaned prerender script into a JSON object
 54 |         prerender_script = json.loads(prerender_script)
 55 | 
 56 |         # Return the relevant data section for further processing
 57 |         return prerender_script['data'][1]
 58 | 
 59 |     def __parse_prerender_script(self, prerender_script):
 60 |         """
 61 |         Parses the prerendered script to extract match and similar items.
 62 | 
 63 |         Parameters:
 64 |         prerender_script (dict): The parsed JSON data from the prerender script.
 65 | 
 66 |         Returns:
 67 |         dict: A dictionary containing the main match and visually similar matches.
 68 |         """
 69 |         # Initialize the result dictionary
 70 |         data = {
 71 |             "match": None,
 72 |             "similar": []
 73 |         }
 74 |         
 75 |         # Extract the best match information if available
 76 |         try:
 77 |             data["match"] = {
 78 |                 "title": prerender_script[0][1][8][12][0][0][0],  # Extract item title
 79 |                 "thumbnail": prerender_script[0][1][8][12][0][2][0][0],  # Extract thumbnail URL
 80 |                 "pageURL": prerender_script[0][1][8][12][0][2][0][4]  # Extract page URL
 81 |             }
 82 |         except IndexError:
 83 |             # If data is unavailable, continue without a match
 84 |             pass
 85 | 
 86 |         # Determine which section to use for extracting visual matches
 87 |         if data["match"] is not None:
 88 |             visual_matches = prerender_script[1][1][8][8][0][12]
 89 |         else:
 90 |             try:
 91 |                 visual_matches = prerender_script[0][1][8][8][0][12]
 92 |             except IndexError:
 93 |                 return data
 94 | 
 95 |         # Iterate through the visual matches and extract relevant details
 96 |         for match in visual_matches:
 97 |             # Safely extract thumbnail URL if available
 98 |             thumbnail_url = match[0][0] if (
 99 |                 isinstance(match[0], list) and len(match[0]) > 0 and
100 |                 isinstance(match[0][0], str)
101 |             ) else None
102 | 
103 |             # Safely extract price if available
104 |             price = match[0][7][1] if (
105 |                 isinstance(match[0], list) and len(match[0]) > 7 and
106 |                 isinstance(match[0][7], list) and len(match[0][7]) > 1 and
107 |                 isinstance(match[0][7][1], str)
108 |             ) else None
109 | 
110 |             # Clean price by removing any special characters (e.g., currency signs)
111 |             price = re.sub(r"[^\d.]", "", price) if price is not None else None
112 | 
113 |             # Safely extract currency if available
114 |             currency = match[0][7][5] if (
115 |                 isinstance(match[0], list) and len(match[0]) > 7 and
116 |                 isinstance(match[0][7], list) and len(match[0][7]) > 5 and
117 |                 isinstance(match[0][7][5], str)
118 |             ) else None
119 | 
120 |             # Append the extracted information to the "similar" matches list
121 |             data["similar"].append(
122 |                 {
123 |                     "title": match[3],  # Extract item title
124 |                     "similarity score": match[1],  # Extract similarity (?) score
125 |                     "thumbnail": thumbnail_url,  # Thumbnail URL
126 |                     "pageURL": match[5],  # Extract page URL
127 |                     "sourceWebsite": match[14],  # Extract source website name
128 |                     "price": price,  # Price (cleaned)
129 |                     "currency": currency  # Currency symbol
130 |                 }
131 |             )
132 | 
133 |         # Return the results dictionary
134 |         return data
135 | 
136 |     def search_by_file(self, file_path: str):
137 |         """
138 |         Perform an image-based search by uploading a file.
139 | 
140 |         Parameters:
141 |         file_path (str): The path to the image file that will be used for the search.
142 | 
143 |         Returns:
144 |         The parsed search results after extracting and processing the response.
145 |         """
146 |         multipart = {
147 |             'encoded_image': (file_path, open(file_path, 'rb')),
148 |             'image_content': ''
149 |         }
150 | 
151 |         # Build the parameter dictionary
152 |         params = {
153 |             "hl": "en",  # Adjust host language here
154 |             "gl": "us",  # Adjust the geolocation parameter here
155 |         }
156 |         
157 |         # Send a POST request to upload the file
158 |         response = self.session.post(
159 |             self.url + "/upload",
160 |             files=multipart,
161 |             params=params,
162 |             allow_redirects=False  # Must be false to capture the 302 response
163 |         )
164 | 
165 |         # Check if the request was successful
166 |         if response.status_code != 302: # Expecting a 302 for redirect
167 |             print(f"Error uploading file: Status code {response.status_code}")
168 |             print(response.text)
169 |             return None  # Or handle the error appropriately
170 | 
171 |         # Get the redirect URL from the 'Location' header
172 |         search_url = response.headers.get('Location')
173 | 
174 |         # If redirect URL is not found, print an error and return
175 |         if search_url is None:
176 |             print("Redirect URL not found in response headers.")
177 |             return None  # Or handle the error appropriately
178 |         
179 |         # Proceed with the redirect
180 |         response = self.session.get(search_url)
181 | 
182 |         # Extract the prerendered JavaScript content for further parsing.
183 |         prerender_script = self.__get_prerender_script(response.text)
184 | 
185 |         # Parse the prerender script and return the processed search result.
186 |         return self.__parse_prerender_script(prerender_script)
187 |         
188 |     def search_by_url(self, url: str):
189 |         """
190 |         Perform an image-based search by providing an image URL.
191 | 
192 |         Parameters:
193 |         url (str): The URL of the image that will be used for the search.
194 | 
195 |         Returns:
196 |         The parsed search results after extracting and processing the response.
197 |         """
198 |         # Build the parameter dictionary
199 |         params = {
200 |             "url": url,
201 |             "hl": "en",  # Adjust host language here
202 |             "gl": "us",  # Adjust geolocation here
203 |         }
204 |         # Send a GET request to the provided URL
205 |         response = self.session.get(
206 |             self.url + "/uploadbyurl",
207 |             params=params,
208 |             allow_redirects=True
209 |         )
210 | 
211 |         # Extract the prerendered JavaScript content for further parsing
212 |         prerender_script = self.__get_prerender_script(response.text)
213 | 
214 |         # Parse the prerender script and return the processed search result
215 |         return self.__parse_prerender_script(prerender_script)


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | requests
2 | beautifulsoup4


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | 
 3 | VERSION = '2023.03.18'
 4 | DESCRIPTION = 'A Python package to reverse image search in Google Lens'
 5 | LONG_DESCRIPTION = 'A Python package to reverse image search in Google Lens, with the ability to search by file path or by url.'
 6 | 
 7 | setup(
 8 |     name="google-lens-python",
 9 |     version=VERSION,
10 |     author="Anhy Krishna Fitiavana",
11 |     author_email="fitiavana.krishna@gmail.com",
12 |     description=DESCRIPTION,
13 |     long_description=LONG_DESCRIPTION,
14 |     packages=find_packages(),
15 |     install_requires=['requests', 'bs4'],
16 |     keywords=['python', 'google', 'scraping'],
17 |     classifiers=[
18 |         "Development Status :: 5 - Production/Stable",
19 |         "Intended Audience :: Developers",
20 |         "Programming Language :: Python :: 3",
21 |         "Operating System :: OS Independent",
22 |     ]
23 | )


--------------------------------------------------------------------------------