├── requirements.txt ├── README.md └── googleImageScraper.py /requirements.txt: -------------------------------------------------------------------------------- 1 | selenium 2 | requests 3 | Pillow -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Google Image Scraper 2 | 3 | ## Description 4 | This Python script allows you to scrape images from Google Images based on a specified search query and save them to your local machine. It utilizes Selenium and the Edge web browser to automate the image retrieval process. 5 | 6 | **Purpose:** The primary goal of this project is to create a dataset for training machine learning and artificial intelligence models in an automated manner. 7 | 8 | ## Usage 9 | 10 | Clone this repository to your local machine: 11 | ```bash 12 | git clone https://github.com/mahmutovichana/Google-Image-Scraper.git 13 | ``` 14 | Navigate to the project folder: 15 | ``` bash 16 | cd Google-Image-Scraper 17 | ``` 18 | Install the required Python packages: 19 | ``` bash 20 | pip install -r requirements.txt 21 | ``` 22 | Update the query variable in the googleImageScraper.py file with your desired search query. 23 | 24 | Run the script: 25 | ``` python 26 | python googleImageScraper.py 27 | ``` 28 | 29 | Images matching your query will be downloaded and saved to a folder on your desktop. 30 | 31 | Dependencies: 32 | - Python 3.x 33 | - Selenium 34 | - Pillow (PIL) 35 | - Microsoft WebDriver (Ensure it's compatible with your Edge browser version) 36 | -------------------------------------------------------------------------------- /googleImageScraper.py: -------------------------------------------------------------------------------- 1 | import os 2 | import io 3 | import time 4 | import base64 5 | import requests 6 | from PIL import Image 7 | from urllib.parse import quote 8 | from selenium import webdriver 9 | from selenium.webdriver.common.by import By 10 | from selenium.webdriver.common.keys import Keys 11 | from selenium.webdriver.common.action_chains import ActionChains 12 | 13 | # Enter query for Google search 14 | query = "plastic" 15 | 16 | # Convert the query into URL format 17 | query_url = quote(query) 18 | 19 | # Specify the desired folder path on the desktop 20 | folder_name = os.path.join('C:\\Users\\mahmu\\OneDrive\\Desktop', query) 21 | 22 | try: 23 | # Create the folder if it doesn't exist 24 | os.makedirs(folder_name) 25 | except Exception as e: 26 | print(f"An error occurred: {str(e)}") 27 | 28 | # Initialize the Edge web browser using options and a service 29 | driver = webdriver.Edge(r"C:\Users\mahmu\MicrosoftWebDriver.exe") 30 | 31 | # URL for Google Images search 32 | url = f"https://www.google.com/search?q={query_url}&tbm=isch" 33 | 34 | # Open the URL in the web browser 35 | driver.get(url) 36 | 37 | # Simulate scrolling to load more images 38 | for _ in range(10): # Adjust the number based on the number of images wanted 39 | driver.execute_script("window.scrollTo(0, document.body.scrollHeight);") 40 | time.sleep(2) # Wait for images to load 41 | 42 | try: 43 | # Find all image elements 44 | img_elements = driver.find_elements_by_css_selector('img.rg_i') 45 | print(img_elements) 46 | except Exception as e: 47 | print(f"An error occurred: {str(e)}") 48 | 49 | # Download and save images 50 | for i, img in enumerate(img_elements): 51 | img_url = img.get_attribute("src") 52 | if img_url and img_url.startswith('http'): 53 | img_response = requests.get(img_url) 54 | img_name = f"{i + 1}.jpg" 55 | img_path = os.path.join(folder_name, img_name) 56 | 57 | # Save the image to computer 58 | with open(img_path, "wb") as img_file: 59 | img_file.write(img_response.content) 60 | elif img_url and img_url.startswith('data:image/jpeg;base64'): 61 | # Decode base64 image data and save it 62 | img_data = img_url.split('base64,')[1] 63 | img = Image.open(io.BytesIO(base64.b64decode(img_data))) 64 | img_name = f"{i + 1}.jpg" 65 | img_path = os.path.join(folder_name, img_name) 66 | img.save(img_path) 67 | 68 | print(f"Images have been downloaded and saved in the folder: {folder_name}") 69 | 70 | # Close the web browser 71 | driver.quit() 72 | --------------------------------------------------------------------------------