├── requirements.txt
├── README.md
└── googleImageScraper.py


/requirements.txt:
--------------------------------------------------------------------------------
1 | selenium
2 | requests
3 | Pillow


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Google Image Scraper
 2 | 
 3 | ## Description
 4 | This Python script allows you to scrape images from Google Images based on a specified search query and save them to your local machine. It utilizes Selenium and the Edge web browser to automate the image retrieval process.
 5 | 
 6 | **Purpose:** The primary goal of this project is to create a dataset for training machine learning and artificial intelligence models in an automated manner.
 7 | 
 8 | ## Usage
 9 | 
10 | Clone this repository to your local machine:
11 | ```bash
12 | git clone https://github.com/mahmutovichana/Google-Image-Scraper.git
13 | ```
14 | Navigate to the project folder:
15 | ``` bash
16 | cd Google-Image-Scraper
17 | ```
18 | Install the required Python packages:
19 | ``` bash
20 | pip install -r requirements.txt
21 | ```
22 | Update the query variable in the googleImageScraper.py file with your desired search query.
23 | 
24 | Run the script:
25 | ``` python
26 | python googleImageScraper.py
27 | ```
28 | 
29 | Images matching your query will be downloaded and saved to a folder on your desktop.
30 | 
31 | Dependencies:
32 | - Python 3.x
33 | - Selenium
34 | - Pillow (PIL)
35 | - Microsoft WebDriver (Ensure it's compatible with your Edge browser version)
36 | 


--------------------------------------------------------------------------------
/googleImageScraper.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import io
 3 | import time
 4 | import base64  
 5 | import requests
 6 | from PIL import Image
 7 | from urllib.parse import quote
 8 | from selenium import webdriver
 9 | from selenium.webdriver.common.by import By
10 | from selenium.webdriver.common.keys import Keys
11 | from selenium.webdriver.common.action_chains import ActionChains
12 | 
13 | # Enter query for Google search
14 | query = "plastic"
15 | 
16 | # Convert the query into URL format
17 | query_url = quote(query)
18 | 
19 | # Specify the desired folder path on the desktop
20 | folder_name = os.path.join('C:\\Users\\mahmu\\OneDrive\\Desktop', query)
21 | 
22 | try:
23 |     # Create the folder if it doesn't exist
24 |     os.makedirs(folder_name)
25 | except Exception as e:
26 |     print(f"An error occurred: {str(e)}")
27 | 
28 | # Initialize the Edge web browser using options and a service
29 | driver = webdriver.Edge(r"C:\Users\mahmu\MicrosoftWebDriver.exe")
30 | 
31 | # URL for Google Images search
32 | url = f"https://www.google.com/search?q={query_url}&tbm=isch"
33 | 
34 | # Open the URL in the web browser
35 | driver.get(url)
36 | 
37 | # Simulate scrolling to load more images
38 | for _ in range(10):  # Adjust the number based on the number of images wanted
39 |     driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
40 |     time.sleep(2)  # Wait for images to load
41 | 
42 | try:
43 |     # Find all image elements
44 |     img_elements = driver.find_elements_by_css_selector('img.rg_i')
45 |     print(img_elements)
46 | except Exception as e:
47 |     print(f"An error occurred: {str(e)}")
48 | 
49 | # Download and save images
50 | for i, img in enumerate(img_elements):
51 |     img_url = img.get_attribute("src")
52 |     if img_url and img_url.startswith('http'):
53 |         img_response = requests.get(img_url)
54 |         img_name = f"{i + 1}.jpg"  
55 |         img_path = os.path.join(folder_name, img_name)
56 | 
57 |         # Save the image to computer
58 |         with open(img_path, "wb") as img_file:
59 |             img_file.write(img_response.content)
60 |     elif img_url and img_url.startswith('data:image/jpeg;base64'):
61 |         # Decode base64 image data and save it
62 |         img_data = img_url.split('base64,')[1]
63 |         img = Image.open(io.BytesIO(base64.b64decode(img_data)))
64 |         img_name = f"{i + 1}.jpg"  
65 |         img_path = os.path.join(folder_name, img_name)
66 |         img.save(img_path)
67 | 
68 | print(f"Images have been downloaded and saved in the folder: {folder_name}")
69 | 
70 | # Close the web browser
71 | driver.quit()
72 | 


--------------------------------------------------------------------------------