├── .gitattributes
├── .gitignore
├── BingImageScrapper.py
├── GettyImagesScrapper.py
├── GoogleImageScrapper.py
├── Misc_tools.ipynb
├── README.md
├── ShutterstockImagesScrapper.py
├── environment.yml
├── main.py
├── patch.py
├── requirements.txt
├── webdriver
    ├── chromedriver
    └── chromedriver.exe
└── youtube_thumbnail.PNG


/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | 
 2 | __pycache__/GoogleImageScrapper.cpython-37.pyc
 3 | ghostdriver.log
 4 | webdriver/phantomjs.exe
 5 | webdriver/chromedriver_win32.zip
 6 | __pycache__/GoogleImageScrapper.cpython-38.pyc
 7 | __pycache__/*
 8 | webdriver/*.zip
 9 | photos
10 | .ipynb_checkpoints/juypter_main-checkpoint.ipynb
11 | 


--------------------------------------------------------------------------------
/BingImageScrapper.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Sat Jul 18 13:01:02 2020
  4 | 
  5 | @author: OHyic
  6 | """
  7 | #import selenium drivers
  8 | from selenium import webdriver
  9 | from selenium.webdriver.chrome.options import Options
 10 | from selenium.webdriver.common.by import By
 11 | from selenium.webdriver.support.ui import WebDriverWait
 12 | from selenium.webdriver.support import expected_conditions as EC
 13 | from selenium.common.exceptions import NoSuchElementException       
 14 | 
 15 | #import helper libraries
 16 | import time
 17 | import urllib.request
 18 | import os
 19 | import requests
 20 | import io
 21 | from PIL import Image
 22 | 
 23 | #custom patch libraries
 24 | import patch 
 25 | 
 26 | class BingImageScraper():
 27 |     def __init__(self,webdriver_path,image_path, search_key="cat",number_of_images=1,headless=False,min_resolution=(0,0),max_resolution=(1920,1080)):
 28 |         #check parameter types
 29 |         image_path = os.path.join(image_path, search_key)
 30 |         if (type(number_of_images)!=int):
 31 |             print("[Error] Number of images must be integer value.")
 32 |             return
 33 |         if not os.path.exists(image_path):
 34 |             print("[INFO] Image path not found. Creating a new folder.")
 35 |             os.makedirs(image_path)
 36 |         #check if chromedriver is updated
 37 |         while(True):
 38 |             try:
 39 |                 #try going to www.Bing.com
 40 |                 options = Options()
 41 |                 if(headless):
 42 |                     options.add_argument('--headless')
 43 |                 driver = webdriver.Chrome(webdriver_path, chrome_options=options)
 44 |                 driver.set_window_size(1400,1050)
 45 |                 driver.get("https://www.Bing.com")
 46 |                 break
 47 |             except:
 48 |                 #patch chromedriver if not available or outdated
 49 |                 try:
 50 |                     driver
 51 |                 except NameError:
 52 |                     is_patched = patch.download_lastest_chromedriver()
 53 |                 else:
 54 |                     is_patched = patch.download_lastest_chromedriver(driver.capabilities['version'])
 55 |                 if (not is_patched): 
 56 |                     exit("[ERR] Please update the chromedriver.exe in the webdriver folder according to your chrome version:https://chromedriver.chromium.org/downloads")
 57 |                     
 58 |         self.driver = driver
 59 |         self.search_key = search_key
 60 |         self.number_of_images = number_of_images
 61 |         self.webdriver_path = webdriver_path
 62 |         self.image_path = image_path
 63 |         # self.url = "https://www.Bing.com/search?q=%s&source=lnms&tbm=isch&sa=X&ved=2ahUKEwie44_AnqLpAhUhBWMBHUFGD90Q_AUoAXoECBUQAw&biw=1920&bih=947"%(search_key)
 64 |         self.url = "https://www.bing.com/images/search?view=detailV2&insightstoken=bcid_S8T7-Bc3-0IE7g*ccid_xPv4Fzf7&form=SBIVSP&iss=SBIUPLOADGET&sbisrc=ImgDropper&idpbck=1&sbifsz=4000+x+3000+%c2%b7+35.99+kB+%c2%b7+png&sbifnm=MicrosoftTeams-image.png&thw=4000&thh=3000&ptime=998&dlen=49132&expw=692&exph=519&selectedindex=0&id=-1431471340&ccid=xPv4Fzf7&vt=2&sim=1"
 65 |         self.headless=headless
 66 |         self.min_resolution = min_resolution
 67 |         self.max_resolution = max_resolution
 68 |         
 69 |     def find_image_urls(self):
 70 |         """
 71 |             This function search and return a list of image urls based on the search key.
 72 |             Example:
 73 |                 Bing_image_scraper = BingImageScraper("webdriver_path","image_path","search_key",number_of_photos)
 74 |                 image_urls = Bing_image_scraper.find_image_urls()
 75 |                 
 76 |         """
 77 |         print("[INFO] Scraping for image link... Please wait.")
 78 |         image_urls=[]
 79 |         count = 0
 80 |         missed_count = 0
 81 |         self.driver.get(self.url)
 82 |         time.sleep(3)
 83 |         indx = 1
 84 |         while self.number_of_images > count:
 85 |             try:
 86 |                 #find and click image
 87 |                 print("[INFO] Getting img src link")
 88 |                 imgurl = self.driver.find_element_by_xpath('//*[@class="tab-content vsi"]/ul[1]/li[1]/div[1]/div[1]/div[1]/div[1]/div[1]/ul[1]/li[%s]/div[1]/div[1]/div[1]/div[1]/a[1]/img'%(str(indx)))
 89 |                 src_link = imgurl.get_attribute('src')
 90 |                 missed_count = 0 
 91 |             except Exception:
 92 |                 print("[-] Unable to get src_link for this photo.")
 93 |                 missed_count = missed_count + 1
 94 |                 if (missed_count>10):
 95 |                     print("[INFO] No more photos.")
 96 |                     break
 97 |                  
 98 |             try:
 99 |                 time.sleep(1)
100 |                 if(("http" in  src_link) and (not "encrypted" in src_link)):
101 |                     print("[INFO] %d. %s"%(count,src_link))
102 |                     image_urls.append(src_link)
103 |                     count +=1
104 |             except Exception:
105 |                 print("[INFO] Unable to go into src link")   
106 |                 
107 |             try:
108 |                 #BUG: Does not really scroll down the page for more images
109 |                 #TODO: fix bug to scroll and load more images for scraping
110 |                 #scroll page to load next image
111 |                 if(count%4==0):
112 |                     self.driver.execute_script("window.scrollTo(0, "+str(indx*60)+");")
113 |                 print("[INFO] Loading more photos")
114 |                 time.sleep(5)
115 |             except Exception:  
116 |                 time.sleep(1)
117 |             indx += 1
118 | 
119 |         
120 |         self.driver.quit()
121 |         print("[INFO] Bing search ended")
122 |         return image_urls
123 | 
124 |     def save_images(self,image_urls):
125 |         #save images into file directory
126 |         """
127 |             This function takes in an array of image urls and save it into the prescribed image path/directory.
128 |             Example:
129 |                 Bing_image_scraper = BingImageScraper("webdriver_path","image_path","search_key",number_of_photos)
130 |                 image_urls=["https://example_1.jpg","https://example_2.jpg"]
131 |                 Bing_image_scraper.save_images(image_urls)
132 |                 
133 |         """
134 |         print("[INFO] Saving Image... Please wait...")
135 |         for indx,image_url in enumerate(image_urls):
136 |             try:
137 |                 print("[INFO] Image url:%s"%(image_url))
138 |                 search_string = ''.join(e for e in self.search_key if e.isalnum())
139 |                 image = requests.get(image_url,timeout=5)
140 |                 if image.status_code == 200:
141 |                     with Image.open(io.BytesIO(image.content)) as image_from_web:
142 |                         try:
143 |                             # filename = "%s%s.%s"%(search_string,str(indx),image_from_web.format.lower())
144 |                             filename = "%s%s.%s"%(search_string,str(indx),'png')
145 |                             image_path = os.path.join(self.image_path, filename)
146 |                             print("[INFO] %d .Image saved at: %s"%(indx,image_path))
147 |                             image_from_web.save(image_path)
148 |                         except OSError:
149 |                             rgb_im = image_from_web.convert('RGB')
150 |                             rgb_im.save(image_path)
151 |                         image_resolution = image_from_web.size
152 |                         if image_resolution != None:
153 |                             if image_resolution[0]<self.min_resolution[0] or image_resolution[1]<self.min_resolution[1] or image_resolution[0]>self.max_resolution[0] or image_resolution[1]>self.max_resolution[1]:
154 |                                 image_from_web.close()
155 |                                 #print("BingImageScraper Notification: %s did not meet resolution requirements."%(image_url))
156 |                                 os.remove(image_path)
157 | 
158 |                         image_from_web.close()
159 |             except Exception as e:
160 |                 print("[ERROR] Failed to be downloaded",e)
161 |                 pass
162 |         print("[INFO] Download Completed. Please note that some photos are not downloaded as it is not in the right format (e.g. jpg, jpeg, png)")


--------------------------------------------------------------------------------
/GettyImagesScrapper.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Tues May 31 11:48:02 2022
  4 | 
  5 | @author: JJLimmm
  6 | """
  7 | #import selenium drivers
  8 | from selenium import webdriver
  9 | from selenium.webdriver.chrome.options import Options
 10 | from selenium.webdriver.common.by import By
 11 | from selenium.webdriver.support.ui import WebDriverWait
 12 | from selenium.webdriver.support import expected_conditions as EC
 13 | from selenium.common.exceptions import NoSuchElementException       
 14 | 
 15 | #import helper libraries
 16 | import time
 17 | import urllib.request
 18 | import os
 19 | import requests
 20 | import io
 21 | from PIL import Image
 22 | 
 23 | #custom patch libraries
 24 | import patch 
 25 | 
 26 | class GettyImageScraper():
 27 |     def __init__(self,webdriver_path,image_path, search_key="cat",number_of_images=1,headless=False,min_resolution=(0,0),max_resolution=(1920,1080)):
 28 |         #check parameter types
 29 |         image_path = os.path.join(image_path, search_key)
 30 |         if (type(number_of_images)!=int):
 31 |             print("[Error] Number of images must be integer value.")
 32 |             return
 33 |         if not os.path.exists(image_path):
 34 |             print("[INFO] Image path not found. Creating a new folder.")
 35 |             os.makedirs(image_path)
 36 |         #check if chromedriver is updated
 37 |         while(True):
 38 |             try:
 39 |                 #try going to www.google.com
 40 |                 options = Options()
 41 |                 if(headless):
 42 |                     options.add_argument('--headless')
 43 |                 driver = webdriver.Chrome(webdriver_path, chrome_options=options)
 44 |                 driver.set_window_size(1400,1050)
 45 |                 driver.get("https://www.google.com")
 46 |                 break
 47 |             except:
 48 |                 #patch chromedriver if not available or outdated
 49 |                 try:
 50 |                     driver
 51 |                 except NameError:
 52 |                     is_patched = patch.download_lastest_chromedriver()
 53 |                 else:
 54 |                     is_patched = patch.download_lastest_chromedriver(driver.capabilities['version'])
 55 |                 if (not is_patched): 
 56 |                     exit("[ERR] Please update the chromedriver.exe in the webdriver folder according to your chrome version:https://chromedriver.chromium.org/downloads")
 57 |                     
 58 |         self.driver = driver
 59 |         self.search_key = search_key
 60 |         self.number_of_images = number_of_images
 61 |         self.webdriver_path = webdriver_path
 62 |         self.image_path = image_path
 63 |         self.url = "https://www.gettyimages.com/search/2/image?family=creative&phrase=%s&page1"%(search_key)
 64 |         self.headless=headless
 65 |         self.min_resolution = min_resolution
 66 |         self.max_resolution = max_resolution
 67 |         
 68 |     def find_image_urls(self):
 69 |         """
 70 |             This function search and return a list of image urls based on the search key.
 71 |             Example:
 72 |                 google_image_scraper = GoogleImageScraper("webdriver_path","image_path","search_key",number_of_photos)
 73 |                 image_urls = google_image_scraper.find_image_urls()
 74 |                 
 75 |         """
 76 |         print("[INFO] Scraping for image link... Please wait.")
 77 |         image_urls=[]
 78 |         count = 0
 79 |         missed_count = 0
 80 |         self.driver.get(self.url)
 81 |         time.sleep(3)
 82 |         indx = 1
 83 |         pagenum = 1
 84 |         while self.number_of_images > count:
 85 |             try:
 86 |                 #find and load image src
 87 |                 imgurl = self.driver.find_element_by_xpath("//*[@class='GalleryItems-module__searchContent___DbMmK']/div[%s]/article[1]/a[1]/figure[1]/picture[1]/img"%(str(indx)))
 88 |                 src_link = imgurl.get_attribute('src')
 89 |                 missed_count = 0 
 90 |             except Exception:
 91 |                 #print("[-] Unable to get photo src.")
 92 |                 missed_count = missed_count + 1
 93 |                 if (missed_count>10):
 94 |                     print("[INFO] No more photos.")
 95 |                     break
 96 |                  
 97 |             try:
 98 |                 #Go to image src
 99 |                 time.sleep(1)
100 |                 if(("http" in  src_link) and (not "encrypted" in src_link)):
101 |                     print("[INFO] %d. %s"%(count,src_link))
102 |                     image_urls.append(src_link)
103 |                     count +=1
104 |             except Exception:
105 |                 print("[INFO] Unable to get to src link")   
106 |                 
107 |             try:
108 |                 #Load next page once reaches 60 images (images per page on Getty)
109 |                 if(count%60==0):
110 |                     # element = self.driver.find_element_by_class_name("PaginationRow-module__buttonText___XM2mA")
111 |                     # element.click()
112 |                     pagenum += 1
113 |                     old_url = self.url
114 |                     new_url = old_url.replace("page=" + str(pagenum - 1), "page=" + str(pagenum))
115 |                     self.driver.get(new_url)
116 |                     indx = 0
117 |                     print("[INFO] Loading more photos")
118 |                     time.sleep(5)
119 |                     
120 |             except Exception:  
121 |                 time.sleep(1)
122 |             indx += 1
123 | 
124 |         
125 |         self.driver.quit()
126 |         print("[INFO] Getty search ended")
127 |         return image_urls
128 | 
129 |     def save_images(self,image_urls):
130 |         #save images into file directory
131 |         """
132 |             This function takes in an array of image urls and save it into the prescribed image path/directory.
133 |             Example:
134 |                 getty_image_scraper = GettyImageScraper("webdriver_path","image_path","search_key",number_of_photos)
135 |                 image_urls=["https://example_1.jpg","https://example_2.jpg"]
136 |                 getty_image_scraper.save_images(image_urls)
137 |                 
138 |         """
139 |         print("[INFO] Saving Image... Please wait...")
140 |         for indx,image_url in enumerate(image_urls):
141 |             try:
142 |                 print("[INFO] Image url:%s"%(image_url))
143 |                 search_string = ''.join(e for e in self.search_key if e.isalnum())
144 |                 image = requests.get(image_url,timeout=5)
145 |                 if image.status_code == 200:
146 |                     with Image.open(io.BytesIO(image.content)) as image_from_web:
147 |                         try:
148 |                             # filename = "%s%s.%s"%(search_string,str(indx),image_from_web.format.lower())
149 |                             filename = "%s%s.%s"%(search_string,str(indx),'png')
150 |                             image_path = os.path.join(self.image_path, filename)
151 |                             print("[INFO] %d .Image saved at: %s"%(indx,image_path))
152 |                             image_from_web.save(image_path)
153 |                         except OSError:
154 |                             rgb_im = image_from_web.convert('RGB')
155 |                             rgb_im.save(image_path)
156 |                         image_resolution = image_from_web.size
157 |                         if image_resolution != None:
158 |                             if image_resolution[0]<self.min_resolution[0] or image_resolution[1]<self.min_resolution[1] or image_resolution[0]>self.max_resolution[0] or image_resolution[1]>self.max_resolution[1]:
159 |                                 image_from_web.close()
160 |                                 #print("GoogleImageScraper Notification: %s did not meet resolution requirements."%(image_url))
161 |                                 os.remove(image_path)
162 | 
163 |                         image_from_web.close()
164 |             except Exception as e:
165 |                 print("[ERROR] Failed to be downloaded",e)
166 |                 pass
167 |         print("[INFO] Download Completed. Please note that some photos are not downloaded as it is not in the right format (e.g. jpg, jpeg, png)")


--------------------------------------------------------------------------------
/GoogleImageScrapper.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Sat Jul 18 13:01:02 2020
  4 | 
  5 | @author: OHyic
  6 | """
  7 | #import selenium drivers
  8 | from selenium import webdriver
  9 | from selenium.webdriver.chrome.options import Options
 10 | from selenium.webdriver.common.by import By
 11 | from selenium.webdriver.support.ui import WebDriverWait
 12 | from selenium.webdriver.support import expected_conditions as EC
 13 | from selenium.common.exceptions import NoSuchElementException       
 14 | 
 15 | #import helper libraries
 16 | import time
 17 | import urllib.request
 18 | import os
 19 | import requests
 20 | import io
 21 | from PIL import Image
 22 | 
 23 | #custom patch libraries
 24 | import patch 
 25 | 
 26 | class GoogleImageScraper():
 27 |     def __init__(self,webdriver_path,image_path, search_key="cat",number_of_images=1,headless=False,min_resolution=(0,0),max_resolution=(1920,1080)):
 28 |         #check parameter types
 29 |         image_path = os.path.join(image_path, search_key)
 30 |         if (type(number_of_images)!=int):
 31 |             print("[Error] Number of images must be integer value.")
 32 |             return
 33 |         if not os.path.exists(image_path):
 34 |             print("[INFO] Image path not found. Creating a new folder.")
 35 |             os.makedirs(image_path)
 36 |         #check if chromedriver is updated
 37 |         while(True):
 38 |             try:
 39 |                 #try going to www.google.com
 40 |                 options = Options()
 41 |                 if(headless):
 42 |                     options.add_argument('--headless')
 43 |                 driver = webdriver.Chrome(webdriver_path, chrome_options=options)
 44 |                 driver.set_window_size(1400,1050)
 45 |                 driver.get("https://www.google.com")
 46 |                 break
 47 |             except:
 48 |                 #patch chromedriver if not available or outdated
 49 |                 try:
 50 |                     driver
 51 |                 except NameError:
 52 |                     is_patched = patch.download_lastest_chromedriver()
 53 |                 else:
 54 |                     is_patched = patch.download_lastest_chromedriver(driver.capabilities['version'])
 55 |                 if (not is_patched): 
 56 |                     exit("[ERR] Please update the chromedriver.exe in the webdriver folder according to your chrome version:https://chromedriver.chromium.org/downloads")
 57 |                     
 58 |         self.driver = driver
 59 |         self.search_key = search_key
 60 |         self.number_of_images = number_of_images
 61 |         self.webdriver_path = webdriver_path
 62 |         self.image_path = image_path
 63 |         # self.url = "https://www.google.com/search?q=%s&source=lnms&tbm=isch&sa=X&ved=2ahUKEwie44_AnqLpAhUhBWMBHUFGD90Q_AUoAXoECBUQAw&biw=1920&bih=947"%(search_key) # Searching by keyword
 64 |         self.url = "https://www.google.com/search?hl=en-SG&tbs=simg:CAESmAIJPN_15ByxqerwajAILELCMpwgaOwo5CAQSFKE2gT-OMJAOtSatN9ERsyzFC78PGht_1ikShZ6PCQ6a88MgE38APW9L2iAFerzeGxTggBTAEDAsQjq7-CBoKCggIARIE0o3PGgwLEJ3twQkaqwEKGQoHbGVpc3VyZdqliPYDCgoIL20vMDRnM3IKJgoSbmF2aWdhdGlvbiBjaGFubmVs2qWI9gMMCgovbS8wY245cDAyCicKE21hcmluZSBhcmNoaXRlY3R1cmXapYj2AwwKCi9tLzAyODBmM18KJAoQbmF2aWdhdGlvbiBjYW5hbNqliPYDDAoKL20vMGNuOXAxMQoXCgViYXJnZdqliPYDCgoIL20vMDFidG4M&sxsrf=ALiCzsZchEwD4sOzSvSxdXZo0NEm2trZ-Q:1654564208185&q=leisure&tbm=isch&source=iu&ictx=1&vet=1&fir=s20go46GGTIFeM%252CTCrJrEtQ54UoUM%252C_%253B6JHjiKQtMbyfiM%252CHiidN4C9K1Mp8M%252C_%253BgflYOi8-hHJzjM%252CCKH9LIY0Y5sRIM%252C_%253BbK8QKedfVsrMEM%252CCnUtPjCOm4eNQM%252C_%253Bu369Z9Aij-NY2M%252CnqmTiYuhZSXipM%252C_%253B8oLRnitJ8fv4rM%252C9-ryFNCHoNl0oM%252C_%253B9CJC_OmFN9ppgM%252Ci5I5xK1uAyAdGM%252C_%253BG9JT2nqOa-tYbM%252CHUtPclSCY8RkfM%252C_%253BRvAHvVfGtCKjqM%252Cc-tpwjk-JEcGUM%252C_%253BD_G99e8hbwozLM%252CC5k6BUjPrvLWoM%252C_%253BjgA2TcZAR_g49M%252C02oZ3tYjywzWmM%252C_%253Bz5g9t5Nz07Kz7M%252CNhJRGORWGsaYmM%252C_%253BovTS6MZw9ftClM%252CtuHhR8NNv5cqfM%252C_%253BKdT9Ld7GemZxWM%252C5HvrmRxgS_hZeM%252C_%253Bty2MBVYOUAk8LM%252Croql3TtxpmSXQM%252C_%253B1oYrvVazcCvaoM%252CTxfFAssWRs4GzM%252C_%253BBAsAbCFW1QHxMM%252CwZJ9oE3NvWtNlM%252C_%253Bbl3YNfZClRnUMM%252CxC151u5NFpJEJM%252C_%253BqpJhApW8NOdryM%252CFyufiWK0p6hMvM%252C_%253BsTy2uMghOq30CM%252CHhHPj5e1CGhPQM%252C_&usg=AI4_-kQWxQ668wqxnZ4YGZtqHHmc7J6sdA&sa=X&ved=2ahUKEwi994v_k5r4AhVF7XMBHWf1A0kQ9QF6BAgKEAE&biw=1536&bih=713&dpr=2.5#imgrc=s20go46GGTIFeM" # Searching by url directly (searching by image on google and copy url of results page)
 65 |         self.headless=headless
 66 |         self.min_resolution = min_resolution
 67 |         self.max_resolution = max_resolution
 68 |         
 69 |     def find_image_urls(self):
 70 |         """
 71 |             This function search and return a list of image urls based on the search key.
 72 |             Example:
 73 |                 google_image_scraper = GoogleImageScraper("webdriver_path","image_path","search_key",number_of_photos)
 74 |                 image_urls = google_image_scraper.find_image_urls()
 75 |                 
 76 |         """
 77 |         print("[INFO] Scraping for image link... Please wait.")
 78 |         image_urls=[]
 79 |         count = 0
 80 |         missed_count = 0
 81 |         self.driver.get(self.url)
 82 |         time.sleep(3)
 83 |         indx = 1
 84 |         while self.number_of_images > count:
 85 |             try:
 86 |                 #find and click image
 87 |                 imgurl = self.driver.find_element_by_xpath('//*[@id="islrg"]/div[1]/div[%s]/a[1]/div[1]/img'%(str(indx)))
 88 |                 imgurl.click()
 89 |                 missed_count = 0 
 90 |             except Exception:
 91 |                 #print("[-] Unable to click this photo.")
 92 |                 missed_count = missed_count + 1
 93 |                 if (missed_count>10):
 94 |                     print("[INFO] No more photos.")
 95 |                     break
 96 |                  
 97 |             try:
 98 |                 #select image from the popup
 99 |                 time.sleep(1)
100 |                 class_names = ["n3VNCb"]
101 |                 images = [self.driver.find_elements_by_class_name(class_name) for class_name in class_names if len(self.driver.find_elements_by_class_name(class_name)) != 0 ][0]
102 |                 for image in images:
103 |                     #only download images that starts with http
104 |                     src_link = image.get_attribute("src")
105 |                     if(("http" in  src_link) and (not "encrypted" in src_link)):
106 |                         print("[INFO] %d. %s"%(count,src_link))
107 |                         image_urls.append(src_link)
108 |                         count +=1
109 |                         break
110 |             except Exception:
111 |                 print("[INFO] Unable to get link")   
112 |                 
113 |             try:
114 |                 #scroll page to load next image
115 |                 if(count%3==0):
116 |                     self.driver.execute_script("window.scrollTo(0, "+str(indx*60)+");")
117 |                 element = self.driver.find_element_by_class_name("mye4qd")
118 |                 element.click()
119 |                 print("[INFO] Loading more photos")
120 |                 time.sleep(3)
121 |             except Exception:  
122 |                 time.sleep(1)
123 |             indx += 1
124 | 
125 |         
126 |         self.driver.quit()
127 |         print("[INFO] Google search ended")
128 |         return image_urls
129 | 
130 |     def save_images(self,image_urls):
131 |         #save images into file directory
132 |         """
133 |             This function takes in an array of image urls and save it into the prescribed image path/directory.
134 |             Example:
135 |                 google_image_scraper = GoogleImageScraper("webdriver_path","image_path","search_key",number_of_photos)
136 |                 image_urls=["https://example_1.jpg","https://example_2.jpg"]
137 |                 google_image_scraper.save_images(image_urls)
138 |                 
139 |         """
140 |         print("[INFO] Saving Image... Please wait...")
141 |         for indx,image_url in enumerate(image_urls):
142 |             try:
143 |                 print("[INFO] Image url:%s"%(image_url))
144 |                 search_string = ''.join(e for e in self.search_key if e.isalnum())
145 |                 image = requests.get(image_url,timeout=5)
146 |                 if image.status_code == 200:
147 |                     with Image.open(io.BytesIO(image.content)) as image_from_web:
148 |                         try:
149 |                             # filename = "%s%s.%s"%(search_string,str(indx),image_from_web.format.lower())
150 |                             filename = "%s%s.%s"%(search_string,str(indx),'png')
151 |                             image_path = os.path.join(self.image_path, filename)
152 |                             print("[INFO] %d .Image saved at: %s"%(indx,image_path))
153 |                             image_from_web.save(image_path)
154 |                         except OSError:
155 |                             rgb_im = image_from_web.convert('RGB')
156 |                             rgb_im.save(image_path)
157 |                         image_resolution = image_from_web.size
158 |                         if image_resolution != None:
159 |                             if image_resolution[0]<self.min_resolution[0] or image_resolution[1]<self.min_resolution[1] or image_resolution[0]>self.max_resolution[0] or image_resolution[1]>self.max_resolution[1]:
160 |                                 image_from_web.close()
161 |                                 #print("GoogleImageScraper Notification: %s did not meet resolution requirements."%(image_url))
162 |                                 os.remove(image_path)
163 | 
164 |                         image_from_web.close()
165 |             except Exception as e:
166 |                 print("[ERROR] Failed to be downloaded",e)
167 |                 pass
168 |         print("[INFO] Download Completed. Please note that some photos are not downloaded as it is not in the right format (e.g. jpg, jpeg, png)")
169 | 


--------------------------------------------------------------------------------
/Misc_tools.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "<h3>Google Image Scrapper for Juypter Notebook</h3>"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": null,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "import os\n",
 17 |     "from GoogleImageScrapper import GoogleImageScraper\n",
 18 |     "from patch import webdriver_executable\n",
 19 |     "\n",
 20 |     "webdriver_path = os.path.normpath(os.path.join(os.getcwd(), 'webdriver', webdriver_executable()))\n",
 21 |     "image_path = os.path.normpath(os.path.join(os.getcwd(), 'photos'))\n",
 22 |     "#add new search key into array [\"cat\",\"t-shirt\",\"apple\",\"orange\",\"pear\",\"fish\"]\n",
 23 |     "search_keys= [\"cat\",\"t-shirt\"]\n",
 24 |     "number_of_images = 20\n",
 25 |     "headless = False\n",
 26 |     "#min_resolution = (width,height)\n",
 27 |     "min_resolution=(0,0)\n",
 28 |     "#max_resolution = (width,height)\n",
 29 |     "max_resolution=(1920,1080)\n",
 30 |     "for search_key in search_keys:\n",
 31 |     "    image_scrapper = GoogleImageScraper(webdriver_path,image_path,search_key,number_of_images,headless,min_resolution,max_resolution)\n",
 32 |     "    image_urls = image_scrapper.find_image_urls()\n",
 33 |     "    image_scrapper.save_images(image_urls)\n"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "code",
 38 |    "execution_count": null,
 39 |    "metadata": {},
 40 |    "outputs": [],
 41 |    "source": [
 42 |     "import os\n",
 43 |     "import cv2\n",
 44 |     "folder = 'photos/chinese cargo boat/'\n",
 45 |     "for i in os.listdir(folder):\n",
 46 |     "    new_name = 'new' + i\n",
 47 |     "    print(\"Old image file: \",folder + i)\n",
 48 |     "    os.rename((folder+i), (folder+new_name))\n",
 49 |     "    print(\"changed to\")\n",
 50 |     "    print(\"New image file:\",folder + new_name)\n"
 51 |    ]
 52 |   },
 53 |   {
 54 |    "cell_type": "markdown",
 55 |    "metadata": {},
 56 |    "source": [
 57 |     "## Extracting Video to Image Frames"
 58 |    ]
 59 |   },
 60 |   {
 61 |    "cell_type": "code",
 62 |    "execution_count": null,
 63 |    "metadata": {},
 64 |    "outputs": [],
 65 |    "source": [
 66 |     "import os\n",
 67 |     "import cv2\n",
 68 |     "\n",
 69 |     "root_folder = \"Bridge\"\n",
 70 |     "sub_folders = [\"bridge\", \"gantry\"]\n",
 71 |     "categories = [\"on_land\", \"on_air\"]\n",
 72 |     "save_folder = \"video_frames\"\n",
 73 |     "\n",
 74 |     "for sub_folder in  sub_folders:\n",
 75 |     "  current_folder = os.path.join(root_folder,sub_folder)\n",
 76 |     "  for category in categories:\n",
 77 |     "    current_subfolder = os.path.join(current_folder, category)\n",
 78 |     "    for video in os.listdir(current_subfolder):\n",
 79 |     "      current_video_name = os.path.splitext(video)[0]\n",
 80 |     "      save_path = os.path.join(save_folder,sub_folder,category,current_video_name)\n",
 81 |     "      if not os.path.exists(save_path):\n",
 82 |     "        os.makedirs(save_path)\n",
 83 |     "      print(f\"Current video: {os.path.join(current_subfolder, video)}\")\n",
 84 |     "      vidcap = cv2.VideoCapture(os.path.join(current_subfolder, video))\n",
 85 |     "      success,image = vidcap.read()\n",
 86 |     "      count = 0\n",
 87 |     "      print(success)\n",
 88 |     "      if success:\n",
 89 |     "        print(f\"Now extracting from {video} in {current_subfolder}. \\n\")\n",
 90 |     "        while success:\n",
 91 |     "          vidcap.set(cv2.CAP_PROP_POS_MSEC,(count*1000))\n",
 92 |     "          success,image = vidcap.read()\n",
 93 |     "          # cv2.imwrite(\"frame%d.jpg\" % count, image)     # save frame as JPEG file      \n",
 94 |     "          # success,image = vidcap.read()\n",
 95 |     "          print('Reading frame: ', count)\n",
 96 |     "          # print(os.path.join(save_path, f\"{current_video_name}_{count}.png\"))\n",
 97 |     "          try:\n",
 98 |     "            cv2.imwrite( os.path.join(save_path, f\"{current_video_name}_{count}.png\"), image)\n",
 99 |     "            print(f\"Current image save as {current_video_name}_{count}.png \")\n",
100 |     "            count += 1\n",
101 |     "          except:\n",
102 |     "            print(f\"All frames extracted from {current_video_name}.\")\n",
103 |     "            break"
104 |    ]
105 |   }
106 |  ],
107 |  "metadata": {
108 |   "interpreter": {
109 |    "hash": "1a78bb8717b0d234854bf9b5d9ed5c93eec43459027a18bd8e8fd1e4b3bd3ecb"
110 |   },
111 |   "kernelspec": {
112 |    "display_name": "Python 3.8.8 ('imagescraper')",
113 |    "language": "python",
114 |    "name": "python3"
115 |   },
116 |   "language_info": {
117 |    "codemirror_mode": {
118 |     "name": "ipython",
119 |     "version": 3
120 |    },
121 |    "file_extension": ".py",
122 |    "mimetype": "text/x-python",
123 |    "name": "python",
124 |    "nbconvert_exporter": "python",
125 |    "pygments_lexer": "ipython3",
126 |    "version": "3.8.8"
127 |   }
128 |  },
129 |  "nbformat": 4,
130 |  "nbformat_minor": 4
131 | }
132 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Image Scraper
 2 | A library to scrape images from websites like Google, Getty and many more in the future.
 3 | 
 4 | ## Pre-requisites:
 5 | 1. conda create --name imagescraper python==3.8.8
 6 | 2. pip install -r requirements.txt
 7 | 3. Download Google Chrome 
 8 | 4. Download Google Webdriver based on your Chrome version (See Setup below for more info)
 9 | 
10 | ## Setup:
11 | 1. Open cmd
12 | 2. Clone the repository (or [download](https://github.com/JJLimmm/Google-Image-Scraper/archive/refs/heads/master.zip))
13 |     ```
14 |     git clone https://github.com/JJLimmm/Google-Image-Scraper
15 |     ```
16 | 3. Install Dependencies
17 |     ```
18 |     pip install -r requirements.txt
19 |     ```  
20 | 4. Download the Chrome Webdriver
21 |     - Download from [here](https://chromedriver.chromium.org/downloads)  
22 | 5. Change certain configs in main.py
23 |     - **line 21** website_list[index] for the website you want to scrape from
24 |     - **line 24** to add in the names of different objects you want to find
25 |     - **line 27** for the number of images you want to scrape
26 | 6. Run the code
27 |     ```
28 |     python main.py
29 |     ```
30 | 
31 | ## Usage:
32 | ```python
33 | #Import libraries (Import in other website scrapers in the future)
34 | from GoogleImageScrapper import GoogleImageScraper
35 | from GettyImagesScrapper import GettyImageScraper
36 | import os
37 | from patch import webdriver_executable
38 | 
39 | #Define file path (Don't change)
40 | webdriver_path = os.path.normpath(os.path.join(os.getcwd(), 'webdriver', webdriver_executable()))
41 | image_path = os.path.normpath(os.path.join(os.getcwd(), 'photos'))
42 | 
43 | #Website used for scraping: 
44 | website_list = ['google', 'getty', 'shutterstock', 'bing']
45 | search_site = website_list[1] #change index number here to select the website you are using
46 | 
47 | #Add new search key into array ["cat","t-shirt","apple","orange","pear","fish"]
48 | search_keys= ["cat","t-shirt"]
49 | 
50 | #Parameters
51 | number_of_images = 10
52 | headless = True
53 | min_resolution=(0,0)
54 | max_resolution=(1920,1080)
55 | 
56 | #Main program
57 | #Choose if using Google or Getty Images Scrapper (or add in other options next time)
58 | for search_key in search_keys:
59 |     if search_site == 'google':
60 |         image_scrapper = GoogleImageScraper(webdriver_path,image_path,search_key,number_of_images,headless,min_resolution,max_resolution)
61 |     if search_site == 'getty':
62 |         image_scrapper = GettyImageScraper(webdriver_path,image_path,search_key,number_of_images,headless,min_resolution,max_resolution)
63 |     if search_site == 'shutterstock':
64 |         image_scrapper = ShutterstockImageScraper(webdriver_path,image_path,search_key,number_of_images,headless,min_resolution,max_resolution)
65 |     if search_site == 'bing':
66 |         image_scrapper = BingImageScraper(webdriver_path,image_path,search_key,number_of_images,headless,min_resolution,max_resolution)
67 |     image_urls = image_scrapper.find_image_urls()
68 |     image_scrapper.save_images(image_urls)
69 | 
70 | #Release resources    
71 | del image_scrapper
72 | 
73 | ```  
74 | ## Development Roadmap:  
75 | [x] Add Scraping from Getty Images  
76 | [ ] Add scraping for shutterstock and bing (**_In-Progress_**)  
77 | [ ] Streamline all website scrapers into one script (Code Refactoring)  
78 | [ ] Support for other website browsers (Firefox, Edge)  
79 | [ ] Add in support for multiple image formats (e.g: jpg, png, jpeg) and reformat non-conventional image formats (webp, etc...)  
80 | 
81 | ## Youtube Video:
82 | [![IMAGE ALT TEXT](youtube_thumbnail.PNG)](https://youtu.be/QZn_ZxpsIw4 "Google Image Scraper")  
83 | *Credits to ohyicong's initial [Google Image Scraper](https://github.com/ohyicong/Google-Image-Scraper.git)*  
84 | 
85 | =======
86 | 
87 | 


--------------------------------------------------------------------------------
/ShutterstockImagesScrapper.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Tues May 31 11:48:02 2022
  4 | 
  5 | @author: JJLimmm
  6 | """
  7 | #import selenium drivers
  8 | from selenium import webdriver
  9 | from selenium.webdriver.chrome.options import Options
 10 | from selenium.webdriver.common.by import By
 11 | from selenium.webdriver.support.ui import WebDriverWait
 12 | from selenium.webdriver.support import expected_conditions as EC
 13 | from selenium.common.exceptions import NoSuchElementException       
 14 | 
 15 | #import helper libraries
 16 | import time
 17 | import urllib.request
 18 | import os
 19 | import requests
 20 | import io
 21 | from PIL import Image
 22 | 
 23 | #custom patch libraries
 24 | import patch 
 25 | 
 26 | class ShutterstockImageScraper():
 27 |     def __init__(self,webdriver_path,image_path, search_key="cat",number_of_images=1,headless=False,min_resolution=(0,0),max_resolution=(1920,1080)):
 28 |         #check parameter types
 29 |         image_path = os.path.join(image_path, search_key)
 30 |         if (type(number_of_images)!=int):
 31 |             print("[Error] Number of images must be integer value.")
 32 |             return
 33 |         if not os.path.exists(image_path):
 34 |             print("[INFO] Image path not found. Creating a new folder.")
 35 |             os.makedirs(image_path)
 36 |         #check if chromedriver is updated
 37 |         while(True):
 38 |             try:
 39 |                 #try going to www.google.com
 40 |                 options = Options()
 41 |                 if(headless):
 42 |                     options.add_argument('--headless')
 43 |                 driver = webdriver.Chrome(webdriver_path, chrome_options=options)
 44 |                 driver.set_window_size(1400,1050)
 45 |                 driver.get("https://www.google.com")
 46 |                 break
 47 |             except:
 48 |                 #patch chromedriver if not available or outdated
 49 |                 try:
 50 |                     driver
 51 |                 except NameError:
 52 |                     is_patched = patch.download_lastest_chromedriver()
 53 |                 else:
 54 |                     is_patched = patch.download_lastest_chromedriver(driver.capabilities['version'])
 55 |                 if (not is_patched): 
 56 |                     exit("[ERR] Please update the chromedriver.exe in the webdriver folder according to your chrome version:https://chromedriver.chromium.org/downloads")
 57 |                     
 58 |         self.driver = driver
 59 |         self.search_key = search_key
 60 |         self.number_of_images = number_of_images
 61 |         self.webdriver_path = webdriver_path
 62 |         self.image_path = image_path
 63 |         self.url = "https://www.shutterstock.com/search/%s?language=en&image_type=photo&sort=popular&page="%(search_key)
 64 |         self.headless=headless
 65 |         self.min_resolution = min_resolution
 66 |         self.max_resolution = max_resolution
 67 |         
 68 |     def find_image_urls(self):
 69 |         """
 70 |             This function search and return a list of image urls based on the search key.
 71 |             Example:
 72 |                 shutterstock_image_scraper = ShutterstockImageScraper("webdriver_path","image_path","search_key",number_of_photos)
 73 |                 image_urls = shutterstock_image_scraper.find_image_urls()
 74 |                 
 75 |         """
 76 |         print("[INFO] Scraping for image link... Please wait.")
 77 |         image_urls=[]
 78 |         count = 0
 79 |         missed_count = 0
 80 |         self.driver.get(self.url)
 81 |         time.sleep(3)
 82 |         indx = 1
 83 |         while self.number_of_images > count:
 84 |             try:
 85 |                 #find and click image
 86 |                 #TODO: Retrieving of img url
 87 |                 imgurl = self.driver.find_element_by_xpath("//*[@class='GalleryItems-module__searchContent___DbMmK']/div[%s]/article[1]/a[1]/figure[1]/picture[1]/img"%(str(indx)))
 88 |                 src_link = imgurl.get_attribute('src')
 89 |                 # imgurl.click()
 90 |                 missed_count = 0 
 91 |             except Exception:
 92 |                 #print("[-] Unable to click this photo.")
 93 |                 missed_count = missed_count + 1
 94 |                 if (missed_count>10):
 95 |                     print("[INFO] No more photos.")
 96 |                     break
 97 |                  
 98 |             try:
 99 |                 #select image from the popup
100 |                 time.sleep(1)
101 |                 if(("http" in  src_link) and (not "encrypted" in src_link)):
102 |                     print("[INFO] %d. %s"%(count,src_link))
103 |                     image_urls.append(src_link)
104 |                     count +=1
105 |             except Exception:
106 |                 print("[INFO] Unable to get link")   
107 |                 
108 |             try:
109 |                 #TODO: scroll page to load next image for scraping
110 |                 if(count%70==0):
111 |                     # self.driver.execute_script("window.scrollTo(0, "+str(indx*60)+");")
112 |                     element = self.driver.find_element_by_class_name("PaginationRow-module__buttonText___XM2mA")
113 |                     element.click()
114 |                     indx = 0
115 |                     print("[INFO] Loading more photos")
116 |                     time.sleep(5)
117 |                     
118 |             except Exception:  
119 |                 time.sleep(1)
120 |             indx += 1
121 | 
122 |         
123 |         self.driver.quit()
124 |         print("[INFO] shutterstock search ended")
125 |         return image_urls
126 | 
127 |     def save_images(self,image_urls):
128 |         #save images into file directory
129 |         """
130 |             This function takes in an array of image urls and save it into the prescribed image path/directory.
131 |             Example:
132 |                 shutterstock_image_scraper = ShutterstockImageScraper("webdriver_path","image_path","search_key",number_of_photos)
133 |                 image_urls=["https://example_1.jpg","https://example_2.jpg"]
134 |                 shutterstock_image_scraper.save_images(image_urls)
135 |                 
136 |         """
137 |         print("[INFO] Saving Image... Please wait...")
138 |         for indx,image_url in enumerate(image_urls):
139 |             try:
140 |                 print("[INFO] Image url:%s"%(image_url))
141 |                 search_string = ''.join(e for e in self.search_key if e.isalnum())
142 |                 image = requests.get(image_url,timeout=5)
143 |                 if image.status_code == 200:
144 |                     with Image.open(io.BytesIO(image.content)) as image_from_web:
145 |                         try:
146 |                             # filename = "%s%s.%s"%(search_string,str(indx),image_from_web.format.lower())
147 |                             filename = "%s%s.%s"%(search_string,str(indx),'png')
148 |                             image_path = os.path.join(self.image_path, filename)
149 |                             print("[INFO] %d .Image saved at: %s"%(indx,image_path))
150 |                             image_from_web.save(image_path)
151 |                         except OSError:
152 |                             rgb_im = image_from_web.convert('RGB')
153 |                             rgb_im.save(image_path)
154 |                         image_resolution = image_from_web.size
155 |                         if image_resolution != None:
156 |                             if image_resolution[0]<self.min_resolution[0] or image_resolution[1]<self.min_resolution[1] or image_resolution[0]>self.max_resolution[0] or image_resolution[1]>self.max_resolution[1]:
157 |                                 image_from_web.close()
158 |                                 os.remove(image_path)
159 | 
160 |                         image_from_web.close()
161 |             except Exception as e:
162 |                 print("[ERROR] Failed to be downloaded",e)
163 |                 pass
164 |         print("[INFO] Download Completed. Please note that some photos are not downloaded as it is not in the right format (e.g. jpg, jpeg, png)")


--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
  1 | name: imagescraper
  2 | channels:
  3 |   - nvidia
  4 |   - defaults
  5 | dependencies:
  6 |   - _ipyw_jlab_nb_ext_conf=0.1.0=py38_0
  7 |   - _libgcc_mutex=0.1=main
  8 |   - _openmp_mutex=4.5=1_gnu
  9 |   - alabaster=0.7.12=pyhd3eb1b0_0
 10 |   - anaconda-client=1.9.0=py38h06a4308_0
 11 |   - anaconda-project=0.10.2=pyhd3eb1b0_0
 12 |   - anyio=3.5.0=py38h06a4308_0
 13 |   - appdirs=1.4.4=pyhd3eb1b0_0
 14 |   - argh=0.26.2=py38_0
 15 |   - argon2-cffi=21.3.0=pyhd3eb1b0_0
 16 |   - argon2-cffi-bindings=21.2.0=py38h7f8727e_0
 17 |   - arrow=0.13.1=py38_0
 18 |   - asn1crypto=1.4.0=py_0
 19 |   - astroid=2.6.6=py38h06a4308_0
 20 |   - astropy=5.0=py38h09021b7_0
 21 |   - async_generator=1.10=pyhd3eb1b0_0
 22 |   - atomicwrites=1.4.0=py_0
 23 |   - attrs=21.4.0=pyhd3eb1b0_0
 24 |   - autopep8=1.6.0=pyhd3eb1b0_0
 25 |   - babel=2.9.1=pyhd3eb1b0_0
 26 |   - backcall=0.2.0=pyhd3eb1b0_0
 27 |   - backports=1.1=pyhd3eb1b0_0
 28 |   - backports.functools_lru_cache=1.6.4=pyhd3eb1b0_0
 29 |   - backports.shutil_get_terminal_size=1.0.0=pyhd3eb1b0_3
 30 |   - backports.tempfile=1.0=pyhd3eb1b0_1
 31 |   - backports.weakref=1.0.post1=py_1
 32 |   - beautifulsoup4=4.10.0=pyh06a4308_0
 33 |   - binaryornot=0.4.4=pyhd3eb1b0_1
 34 |   - bitarray=2.3.5=py38h7f8727e_0
 35 |   - bkcharts=0.2=py38_0
 36 |   - black=19.10b0=py_0
 37 |   - blas=1.0=mkl
 38 |   - bleach=4.1.0=pyhd3eb1b0_0
 39 |   - blosc=1.21.0=h8c45485_0
 40 |   - bokeh=2.4.2=py38h06a4308_0
 41 |   - boto=2.49.0=py38_0
 42 |   - bottleneck=1.3.2=py38heb32a55_1
 43 |   - brotli=1.0.9=he6710b0_2
 44 |   - brotlipy=0.7.0=py38h27cfd23_1003
 45 |   - brunsli=0.1=h2531618_0
 46 |   - bzip2=1.0.8=h7b6447c_0
 47 |   - c-ares=1.18.1=h7f8727e_0
 48 |   - ca-certificates=2021.10.26=h06a4308_2
 49 |   - cairo=1.16.0=hf32fb01_1
 50 |   - cffi=1.15.0=py38hd667e15_1
 51 |   - cfitsio=3.470=hf0d0db6_6
 52 |   - charls=2.2.0=h2531618_0
 53 |   - charset-normalizer=2.0.4=pyhd3eb1b0_0
 54 |   - click=8.0.3=pyhd3eb1b0_0
 55 |   - cloudpickle=2.0.0=pyhd3eb1b0_0
 56 |   - clyent=1.2.2=py38_1
 57 |   - colorama=0.4.4=pyhd3eb1b0_0
 58 |   - conda-content-trust=0.1.1=pyhd3eb1b0_0
 59 |   - conda-pack=0.6.0=pyhd3eb1b0_0
 60 |   - conda-package-handling=1.7.3=py38h27cfd23_1
 61 |   - conda-repo-cli=1.0.4=pyhd3eb1b0_0
 62 |   - conda-verify=3.4.2=py_1
 63 |   - contextlib2=0.6.0.post1=pyhd3eb1b0_0
 64 |   - cookiecutter=1.7.2=pyhd3eb1b0_0
 65 |   - cudatoolkit=11.1.74=h6bb024c_0
 66 |   - curl=7.80.0=h7f8727e_0
 67 |   - cycler=0.11.0=pyhd3eb1b0_0
 68 |   - cython=0.29.25=py38hdbfa776_0
 69 |   - cytoolz=0.11.0=py38h7b6447c_0
 70 |   - daal4py=2021.5.0=py38h78b71dc_0
 71 |   - dal=2021.5.1=h06a4308_803
 72 |   - dask=2021.10.0=pyhd3eb1b0_0
 73 |   - dask-core=2021.10.0=pyhd3eb1b0_0
 74 |   - dataclasses=0.8=pyh6d0b6a4_7
 75 |   - dbus=1.13.18=hb2f20db_0
 76 |   - debugpy=1.5.1=py38h295c915_0
 77 |   - decorator=5.1.1=pyhd3eb1b0_0
 78 |   - defusedxml=0.7.1=pyhd3eb1b0_0
 79 |   - diff-match-patch=20200713=pyhd3eb1b0_0
 80 |   - distributed=2021.10.0=py38h06a4308_0
 81 |   - docutils=0.17.1=py38h06a4308_1
 82 |   - entrypoints=0.3=py38_0
 83 |   - et_xmlfile=1.1.0=py38h06a4308_0
 84 |   - expat=2.4.4=h295c915_0
 85 |   - fastcache=1.1.0=py38h7b6447c_0
 86 |   - filelock=3.4.2=pyhd3eb1b0_0
 87 |   - flake8=3.9.2=pyhd3eb1b0_0
 88 |   - flask=1.1.2=pyhd3eb1b0_0
 89 |   - fontconfig=2.13.1=h6c09931_0
 90 |   - fonttools=4.25.0=pyhd3eb1b0_0
 91 |   - freetype=2.11.0=h70c0345_0
 92 |   - fribidi=1.0.10=h7b6447c_0
 93 |   - fsspec=2022.1.0=pyhd3eb1b0_0
 94 |   - future=0.18.2=py38_1
 95 |   - get_terminal_size=1.0.0=haa9412d_0
 96 |   - gevent=21.8.0=py38h7f8727e_1
 97 |   - giflib=5.2.1=h7b6447c_0
 98 |   - glib=2.69.1=h4ff587b_1
 99 |   - glob2=0.7=pyhd3eb1b0_0
100 |   - gmp=6.2.1=h2531618_2
101 |   - gmpy2=2.1.2=py38heeb90bb_0
102 |   - graphite2=1.3.14=h23475e2_0
103 |   - greenlet=1.1.1=py38h295c915_0
104 |   - gst-plugins-base=1.14.0=h8213a91_2
105 |   - gstreamer=1.14.0=h28cd5cc_2
106 |   - h5py=2.10.0=py38h7918eee_0
107 |   - harfbuzz=2.8.1=h6f93f22_0
108 |   - hdf5=1.10.4=hb1b8bf9_0
109 |   - heapdict=1.0.1=pyhd3eb1b0_0
110 |   - html5lib=1.1=pyhd3eb1b0_0
111 |   - icu=58.2=he6710b0_3
112 |   - idna=3.3=pyhd3eb1b0_0
113 |   - imagecodecs=2021.8.26=py38h4cda21f_0
114 |   - imageio=2.9.0=pyhd3eb1b0_0
115 |   - imagesize=1.3.0=pyhd3eb1b0_0
116 |   - importlib-metadata=4.8.2=py38h06a4308_0
117 |   - importlib_metadata=4.8.2=hd3eb1b0_0
118 |   - inflection=0.5.1=py38h06a4308_0
119 |   - iniconfig=1.1.1=pyhd3eb1b0_0
120 |   - intel-openmp=2021.4.0=h06a4308_3561
121 |   - intervaltree=3.1.0=pyhd3eb1b0_0
122 |   - ipykernel=6.4.1=py38h06a4308_1
123 |   - ipython=7.31.1=py38h06a4308_0
124 |   - ipython_genutils=0.2.0=pyhd3eb1b0_1
125 |   - ipywidgets=7.6.5=pyhd3eb1b0_1
126 |   - isort=5.9.3=pyhd3eb1b0_0
127 |   - itsdangerous=2.0.1=pyhd3eb1b0_0
128 |   - jbig=2.1=hdba287a_0
129 |   - jdcal=1.4.1=pyhd3eb1b0_0
130 |   - jedi=0.18.1=py38h06a4308_1
131 |   - jeepney=0.7.1=pyhd3eb1b0_0
132 |   - jinja2=2.11.3=pyhd3eb1b0_0
133 |   - jinja2-time=0.2.0=pyhd3eb1b0_2
134 |   - joblib=1.1.0=pyhd3eb1b0_0
135 |   - jpeg=9d=h7f8727e_0
136 |   - json5=0.9.6=pyhd3eb1b0_0
137 |   - jsonschema=3.2.0=pyhd3eb1b0_2
138 |   - jupyter=1.0.0=py38_7
139 |   - jupyter_client=6.1.12=pyhd3eb1b0_0
140 |   - jupyter_console=6.4.0=pyhd3eb1b0_0
141 |   - jupyter_core=4.9.1=py38h06a4308_0
142 |   - jupyter_server=1.13.5=pyhd3eb1b0_0
143 |   - jupyterlab=3.2.9=pyhd3eb1b0_0
144 |   - jupyterlab_pygments=0.1.2=py_0
145 |   - jupyterlab_server=2.10.3=pyhd3eb1b0_1
146 |   - jupyterlab_widgets=1.0.0=pyhd3eb1b0_1
147 |   - jxrlib=1.1=h7b6447c_2
148 |   - keyring=23.4.0=py38h06a4308_0
149 |   - kiwisolver=1.3.2=py38h295c915_0
150 |   - krb5=1.19.2=hac12032_0
151 |   - lazy-object-proxy=1.6.0=py38h27cfd23_0
152 |   - lcms2=2.12=h3be6417_0
153 |   - ld_impl_linux-64=2.35.1=h7274673_9
154 |   - lerc=3.0=h295c915_0
155 |   - libaec=1.0.4=he6710b0_1
156 |   - libarchive=3.4.2=h62408e4_0
157 |   - libcurl=7.80.0=h0b77cf5_0
158 |   - libdeflate=1.8=h7f8727e_5
159 |   - libedit=3.1.20210910=h7f8727e_0
160 |   - libev=4.33=h7f8727e_1
161 |   - libffi=3.3=he6710b0_2
162 |   - libgcc-ng=9.3.0=h5101ec6_17
163 |   - libgfortran-ng=7.5.0=ha8ba4b0_17
164 |   - libgfortran4=7.5.0=ha8ba4b0_17
165 |   - libgomp=9.3.0=h5101ec6_17
166 |   - liblief=0.10.1=he6710b0_0
167 |   - libllvm11=11.1.0=h3826bc1_0
168 |   - libnghttp2=1.46.0=hce63b2e_0
169 |   - libpng=1.6.37=hbc83047_0
170 |   - libsodium=1.0.18=h7b6447c_0
171 |   - libspatialindex=1.9.3=h2531618_0
172 |   - libssh2=1.9.0=h1ba5d50_1
173 |   - libstdcxx-ng=9.3.0=hd4cf53a_17
174 |   - libtiff=4.2.0=h85742a9_0
175 |   - libtool=2.4.6=h295c915_1008
176 |   - libuuid=1.0.3=h7f8727e_2
177 |   - libuv=1.40.0=h7b6447c_0
178 |   - libwebp=1.2.0=h89dd481_0
179 |   - libwebp-base=1.2.0=h27cfd23_0
180 |   - libxcb=1.14=h7b6447c_0
181 |   - libxml2=2.9.12=h03d6c58_0
182 |   - libxslt=1.1.34=hc22bd24_0
183 |   - libzopfli=1.0.3=he6710b0_0
184 |   - llvmlite=0.37.0=py38h295c915_1
185 |   - locket=0.2.1=py38h06a4308_1
186 |   - lxml=4.7.1=py38h1f438cf_1
187 |   - lz4-c=1.9.3=h295c915_1
188 |   - lzo=2.10=h7b6447c_2
189 |   - markupsafe=1.1.1=py38h7b6447c_0
190 |   - matplotlib=3.5.1=py38h06a4308_0
191 |   - matplotlib-base=3.5.1=py38ha18d171_0
192 |   - matplotlib-inline=0.1.2=pyhd3eb1b0_2
193 |   - mccabe=0.6.1=py38_1
194 |   - mistune=0.8.4=py38h7b6447c_1000
195 |   - mkl=2021.4.0=h06a4308_640
196 |   - mkl-service=2.4.0=py38h7f8727e_0
197 |   - mkl_fft=1.3.1=py38hd3c417c_0
198 |   - mkl_random=1.2.2=py38h51133e4_0
199 |   - mock=4.0.3=pyhd3eb1b0_0
200 |   - more-itertools=8.12.0=pyhd3eb1b0_0
201 |   - mpc=1.1.0=h10f8cd9_1
202 |   - mpfr=4.0.2=hb69a4c5_1
203 |   - mpi=1.0=mpich
204 |   - mpich=3.3.2=hc856adb_0
205 |   - mpmath=1.2.1=py38h06a4308_0
206 |   - msgpack-python=1.0.2=py38hff7bd54_1
207 |   - multipledispatch=0.6.0=py38_0
208 |   - munkres=1.1.4=py_0
209 |   - mypy_extensions=0.4.3=py38h06a4308_1
210 |   - navigator-updater=0.2.1=py38_0
211 |   - nb_conda=2.2.1=py38_1
212 |   - nb_conda_kernels=2.3.1=py38h06a4308_0
213 |   - nbclassic=0.3.5=pyhd3eb1b0_0
214 |   - nbclient=0.5.11=pyhd3eb1b0_0
215 |   - nbconvert=6.3.0=py38h06a4308_0
216 |   - nbformat=5.1.3=pyhd3eb1b0_0
217 |   - ncurses=6.3=h7f8727e_2
218 |   - nest-asyncio=1.5.1=pyhd3eb1b0_0
219 |   - networkx=2.6.3=pyhd3eb1b0_0
220 |   - nltk=3.6.5=pyhd3eb1b0_0
221 |   - nose=1.3.7=pyhd3eb1b0_1008
222 |   - notebook=6.4.8=py38h06a4308_0
223 |   - numba=0.54.1=py38h51133e4_0
224 |   - numexpr=2.8.1=py38h6abb31d_0
225 |   - numpy=1.20.3=py38hf144106_0
226 |   - numpy-base=1.20.3=py38h74d4b33_0
227 |   - numpydoc=1.2=pyhd3eb1b0_0
228 |   - olefile=0.46=pyhd3eb1b0_0
229 |   - openjpeg=2.4.0=h3ad879b_0
230 |   - openpyxl=3.0.9=pyhd3eb1b0_0
231 |   - openssl=1.1.1m=h7f8727e_0
232 |   - packaging=21.3=pyhd3eb1b0_0
233 |   - pandas=1.4.1=py38h295c915_0
234 |   - pandocfilters=1.5.0=pyhd3eb1b0_0
235 |   - pango=1.45.3=hd140c19_0
236 |   - parso=0.8.3=pyhd3eb1b0_0
237 |   - partd=1.2.0=pyhd3eb1b0_0
238 |   - patchelf=0.13=h295c915_0
239 |   - path=16.2.0=pyhd3eb1b0_0
240 |   - path.py=12.5.0=hd3eb1b0_0
241 |   - pathlib2=2.3.6=py38h06a4308_2
242 |   - pathspec=0.7.0=py_0
243 |   - patsy=0.5.2=py38h06a4308_1
244 |   - pcre=8.45=h295c915_0
245 |   - pep8=1.7.1=py38_0
246 |   - pexpect=4.8.0=pyhd3eb1b0_3
247 |   - pickleshare=0.7.5=pyhd3eb1b0_1003
248 |   - pillow=8.4.0=py38h5aabda8_0
249 |   - pip=21.2.4=py38h06a4308_0
250 |   - pixman=0.40.0=h7f8727e_1
251 |   - pkginfo=1.8.2=pyhd3eb1b0_0
252 |   - pluggy=1.0.0=py38h06a4308_0
253 |   - ply=3.11=py38_0
254 |   - poyo=0.5.0=pyhd3eb1b0_0
255 |   - prometheus_client=0.13.1=pyhd3eb1b0_0
256 |   - prompt-toolkit=3.0.20=pyhd3eb1b0_0
257 |   - prompt_toolkit=3.0.20=hd3eb1b0_0
258 |   - psutil=5.8.0=py38h27cfd23_1
259 |   - ptyprocess=0.7.0=pyhd3eb1b0_2
260 |   - py=1.11.0=pyhd3eb1b0_0
261 |   - py-lief=0.10.1=py38h403a769_0
262 |   - pycodestyle=2.7.0=pyhd3eb1b0_0
263 |   - pycosat=0.6.3=py38h7b6447c_1
264 |   - pycparser=2.21=pyhd3eb1b0_0
265 |   - pycurl=7.44.1=py38h8f2d780_1
266 |   - pydocstyle=6.1.1=pyhd3eb1b0_0
267 |   - pyerfa=2.0.0=py38h27cfd23_0
268 |   - pyflakes=2.3.1=pyhd3eb1b0_0
269 |   - pygments=2.11.2=pyhd3eb1b0_0
270 |   - pylint=2.9.6=py38h06a4308_1
271 |   - pyls-spyder=0.4.0=pyhd3eb1b0_0
272 |   - pyodbc=4.0.32=py38h295c915_0
273 |   - pyopenssl=22.0.0=pyhd3eb1b0_0
274 |   - pyparsing=3.0.4=pyhd3eb1b0_0
275 |   - pyqt=5.9.2=py38h05f1152_4
276 |   - pyrsistent=0.18.0=py38heee7806_0
277 |   - pysocks=1.7.1=py38h06a4308_0
278 |   - pytables=3.6.1=py38h9fd0a39_0
279 |   - pytest=6.2.5=py38h06a4308_2
280 |   - python=3.8.8=hdb3f193_5
281 |   - python-dateutil=2.8.2=pyhd3eb1b0_0
282 |   - python-libarchive-c=2.9=pyhd3eb1b0_1
283 |   - python-lsp-black=1.0.0=pyhd3eb1b0_0
284 |   - python-lsp-jsonrpc=1.0.0=pyhd3eb1b0_0
285 |   - python-lsp-server=1.2.4=pyhd3eb1b0_0
286 |   - python-slugify=5.0.2=pyhd3eb1b0_0
287 |   - pytz=2021.3=pyhd3eb1b0_0
288 |   - pywavelets=1.1.1=py38h7b6447c_2
289 |   - pyxdg=0.27=pyhd3eb1b0_0
290 |   - pyyaml=6.0=py38h7f8727e_1
291 |   - pyzmq=22.3.0=py38h295c915_2
292 |   - qdarkstyle=3.0.2=pyhd3eb1b0_0
293 |   - qstylizer=0.1.10=pyhd3eb1b0_0
294 |   - qt=5.9.7=h5867ecd_1
295 |   - qtawesome=1.0.3=pyhd3eb1b0_0
296 |   - qtconsole=5.2.2=pyhd3eb1b0_0
297 |   - qtpy=1.11.2=pyhd3eb1b0_0
298 |   - readline=8.1.2=h7f8727e_1
299 |   - regex=2021.11.2=py38h7f8727e_0
300 |   - requests=2.27.1=pyhd3eb1b0_0
301 |   - ripgrep=12.1.1=0
302 |   - rope=0.22.0=pyhd3eb1b0_0
303 |   - rtree=0.9.7=py38h06a4308_1
304 |   - ruamel_yaml=0.15.100=py38h27cfd23_0
305 |   - scikit-image=0.18.3=py38h51133e4_0
306 |   - scikit-learn=1.0.2=py38h51133e4_1
307 |   - scikit-learn-intelex=2021.5.0=py38h06a4308_0
308 |   - scipy=1.7.3=py38hc147768_0
309 |   - seaborn=0.11.2=pyhd3eb1b0_0
310 |   - secretstorage=3.3.1=py38h06a4308_0
311 |   - send2trash=1.8.0=pyhd3eb1b0_1
312 |   - setuptools=58.0.4=py38h06a4308_0
313 |   - simplegeneric=0.8.1=py38_2
314 |   - singledispatch=3.7.0=pyhd3eb1b0_1001
315 |   - sip=4.19.13=py38h295c915_0
316 |   - six=1.16.0=pyhd3eb1b0_1
317 |   - snappy=1.1.8=he6710b0_0
318 |   - sniffio=1.2.0=py38h06a4308_1
319 |   - snowballstemmer=2.2.0=pyhd3eb1b0_0
320 |   - sortedcollections=2.1.0=pyhd3eb1b0_0
321 |   - sortedcontainers=2.4.0=pyhd3eb1b0_0
322 |   - soupsieve=2.3.1=pyhd3eb1b0_0
323 |   - sphinx=4.4.0=pyhd3eb1b0_0
324 |   - sphinxcontrib=1.0=py38_1
325 |   - sphinxcontrib-applehelp=1.0.2=pyhd3eb1b0_0
326 |   - sphinxcontrib-devhelp=1.0.2=pyhd3eb1b0_0
327 |   - sphinxcontrib-htmlhelp=2.0.0=pyhd3eb1b0_0
328 |   - sphinxcontrib-jsmath=1.0.1=pyhd3eb1b0_0
329 |   - sphinxcontrib-qthelp=1.0.3=pyhd3eb1b0_0
330 |   - sphinxcontrib-serializinghtml=1.1.5=pyhd3eb1b0_0
331 |   - sphinxcontrib-websupport=1.2.4=py_0
332 |   - spyder=5.1.5=py38h06a4308_1
333 |   - spyder-kernels=2.1.3=py38h06a4308_0
334 |   - sqlalchemy=1.4.27=py38h7f8727e_0
335 |   - sqlite=3.37.2=hc218d9a_0
336 |   - statsmodels=0.12.2=py38h27cfd23_0
337 |   - sympy=1.9=py38h06a4308_0
338 |   - tbb=2021.5.0=hd09550d_0
339 |   - tbb4py=2021.5.0=py38hd09550d_0
340 |   - tblib=1.7.0=pyhd3eb1b0_0
341 |   - terminado=0.13.1=py38h06a4308_0
342 |   - testpath=0.5.0=pyhd3eb1b0_0
343 |   - text-unidecode=1.3=pyhd3eb1b0_0
344 |   - textdistance=4.2.1=pyhd3eb1b0_0
345 |   - threadpoolctl=2.2.0=pyh0d69192_0
346 |   - three-merge=0.1.1=pyhd3eb1b0_0
347 |   - tifffile=2021.7.2=pyhd3eb1b0_2
348 |   - tinycss=0.4=pyhd3eb1b0_1002
349 |   - tk=8.6.11=h1ccaba5_0
350 |   - toml=0.10.2=pyhd3eb1b0_0
351 |   - toolz=0.11.2=pyhd3eb1b0_0
352 |   - tornado=6.1=py38h27cfd23_0
353 |   - tqdm=4.62.3=pyhd3eb1b0_1
354 |   - traitlets=5.1.1=pyhd3eb1b0_0
355 |   - typed-ast=1.4.3=py38h7f8727e_1
356 |   - typing-extensions=3.10.0.2=hd3eb1b0_0
357 |   - typing_extensions=3.10.0.2=pyh06a4308_0
358 |   - ujson=4.2.0=py38h295c915_0
359 |   - unicodecsv=0.14.1=py38_0
360 |   - unidecode=1.2.0=pyhd3eb1b0_0
361 |   - unixodbc=2.3.9=h7b6447c_0
362 |   - urllib3=1.26.8=pyhd3eb1b0_0
363 |   - watchdog=2.1.6=py38h06a4308_0
364 |   - wcwidth=0.2.5=pyhd3eb1b0_0
365 |   - webencodings=0.5.1=py38_1
366 |   - websocket-client=0.58.0=py38h06a4308_4
367 |   - werkzeug=2.0.2=pyhd3eb1b0_0
368 |   - wheel=0.37.1=pyhd3eb1b0_0
369 |   - whichcraft=0.6.1=pyhd3eb1b0_0
370 |   - widgetsnbextension=3.5.2=py38h06a4308_0
371 |   - wrapt=1.12.1=py38h7b6447c_1
372 |   - wurlitzer=3.0.2=py38h06a4308_0
373 |   - xlrd=2.0.1=pyhd3eb1b0_0
374 |   - xlsxwriter=3.0.2=pyhd3eb1b0_0
375 |   - xlwt=1.3.0=py38_0
376 |   - xmltodict=0.12.0=pyhd3eb1b0_0
377 |   - xz=5.2.5=h7b6447c_0
378 |   - yaml=0.2.5=h7b6447c_0
379 |   - yapf=0.31.0=pyhd3eb1b0_0
380 |   - zeromq=4.3.4=h2531618_0
381 |   - zfp=0.5.5=h295c915_6
382 |   - zict=2.0.0=pyhd3eb1b0_0
383 |   - zipp=3.7.0=pyhd3eb1b0_0
384 |   - zlib=1.2.11=h7f8727e_4
385 |   - zope=1.0=py38_1
386 |   - zope.event=4.5.0=py38_0
387 |   - zope.interface=5.4.0=py38h7f8727e_0
388 |   - zstd=1.4.9=haebb681_0
389 |   - pip:
390 |     - absl-py==0.13.0
391 |     - cachetools==4.2.2
392 |     - certifi==2019.11.28
393 |     - chardet==3.0.4
394 |     - cryptography==2.8
395 |     - google-auth==1.32.0
396 |     - google-auth-oauthlib==0.4.4
397 |     - grpcio==1.38.1
398 |     - markdown==3.3.4
399 |     - mxnet==1.5.0
400 |     - netron==4.9.8
401 |     - oauthlib==3.1.1
402 |     - opencv-python==4.5.2.54
403 |     - protobuf==3.17.3
404 |     - pyasn1==0.4.8
405 |     - pyasn1-modules==0.2.8
406 |     - pyinstrument==3.4.2
407 |     - pyinstrument-cext==0.2.4
408 |     - python-graphviz==0.8.4
409 |     - requests-oauthlib==1.3.0
410 |     - rsa==4.7.2
411 |     - scrapy==2.0.0
412 |     - selenium==3.141.0
413 |     - service-identity==18.1.0
414 |     - tensorboard==2.5.0
415 |     - tensorboard-data-server==0.6.1
416 |     - tensorboard-plugin-wit==1.8.0
417 |     - thop==0.0.31-2005241907
418 |     - torch==1.5.1
419 |     - torchvision==0.6.1
420 |     - twisted==19.10.0
421 | prefix: /home/mpe/anaconda3/envs/imagescraper
422 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Sun Jul 12 11:02:06 2020
 4 | 
 5 | @author: OHyic
 6 | 
 7 | """
 8 | #Import libraries
 9 | import os
10 | from GoogleImageScrapper import GoogleImageScraper
11 | from GettyImagesScrapper import GettyImageScraper
12 | from BingImageScrapper import BingImageScraper
13 | # from ShutterstockImagesScrapper import ShutterstockImageScraper
14 | from patch import webdriver_executable
15 | 
16 | if __name__ == "__main__":
17 |     #Define file path
18 |     webdriver_path = os.path.normpath(os.path.join(os.getcwd(), 'webdriver', webdriver_executable()))
19 |     image_path = os.path.normpath(os.path.join(os.getcwd(), 'photos'))
20 | 
21 |     #Website used for scraping: 
22 |     website_list = ['google', 'getty', 'shutterstock', 'bing']
23 |     search_site = website_list[0] #change index number here to select the website you are using
24 | 
25 |     #Add new search key into array ["cat","t-shirt","apple","orange","pear","fish"]
26 |     search_keys= ['chinese cargo boat']
27 | 
28 |     #Parameters
29 |     number_of_images = 1000
30 |     headless = False
31 |     min_resolution=(0,0)
32 |     max_resolution=(9999,9999)
33 | 
34 |     #Main program
35 |     #Choose if using Google, Getty or Shutterstock Images Scrapper
36 |     for search_key in search_keys:
37 |         if search_site == 'google':
38 |             image_scrapper = GoogleImageScraper(webdriver_path,image_path,search_key,number_of_images,headless,min_resolution,max_resolution)
39 |         if search_site == 'getty':
40 |             image_scrapper = GettyImageScraper(webdriver_path,image_path,search_key,number_of_images,headless,min_resolution,max_resolution)
41 |         # if search_site == 'shutterstock':
42 |             # image_scrapper = ShutterstockImageScraper(webdriver_path,image_path,search_key,number_of_images,headless,min_resolution,max_resolution)
43 |         if search_site == 'bing':
44 |             image_scrapper = BingImageScraper(webdriver_path,image_path,search_key,number_of_images,headless,min_resolution,max_resolution)
45 |         
46 |         image_urls = image_scrapper.find_image_urls()
47 |         image_scrapper.save_images(image_urls)
48 |     
49 |     #Release resources    
50 |     del image_scrapper


--------------------------------------------------------------------------------
/patch.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Sun May 23 14:44:43 2021
 4 | 
 5 | @author: Yicong
 6 | """
 7 | #!/usr/bin/env python3
 8 | from selenium import webdriver
 9 | from selenium.webdriver.common.keys import Keys
10 | from selenium.common.exceptions import WebDriverException, SessionNotCreatedException
11 | import sys
12 | import os
13 | import urllib.request
14 | import re
15 | import zipfile
16 | import stat
17 | from sys import platform
18 | 
19 | def webdriver_executable():
20 |     if platform == "linux" or platform == "linux2" or platform == "darwin":
21 |         return 'chromedriver'
22 |     return 'chromedriver.exe'
23 | 
24 | def download_lastest_chromedriver(current_chrome_version=""):
25 |     def get_platform_filename():
26 |         filename = ''
27 |         is_64bits = sys.maxsize > 2**32
28 |     
29 |         if platform == "linux" or platform == "linux2":
30 |             # linux
31 |             filename += 'linux'
32 |             filename += '64' if is_64bits else '32'
33 |         elif platform == "darwin":
34 |             # OS X
35 |             filename += 'mac64'
36 |         elif platform == "win32":
37 |             # Windows...
38 |             filename += 'win32'
39 |     
40 |         filename += '.zip'
41 |     
42 |         return filename
43 |     
44 |     # Find the latest chromedriver, download, unzip, set permissions to executable.
45 |     
46 |     result = False
47 |     try:
48 |         url = 'https://chromedriver.chromium.org/downloads'
49 |         base_driver_url = 'https://chromedriver.storage.googleapis.com/'
50 |         file_name = 'chromedriver_' + get_platform_filename()
51 |         pattern = 'https://.*?path=(\d+\.\d+\.\d+\.\d+)'
52 |     
53 |         # Download latest chromedriver.
54 |         stream = urllib.request.urlopen(url)
55 |         content = stream.read().decode('utf8')
56 |     
57 |         # Parse the latest version.
58 |         all_match = re.findall(pattern, content)
59 |         
60 |         if all_match:
61 |             # Version of latest driver.
62 |             if(current_chrome_version!=""):
63 |                 print("[INFO] updating chromedriver")
64 |                 all_match = list(set(re.findall(pattern, content)))
65 |                 current_chrome_version = ".".join(current_chrome_version.split(".")[:-1])
66 |                 version_match = [i for i in all_match if re.search("^%s"%current_chrome_version,i)]
67 |                 version = version_match[0]
68 |             else:
69 |                 print("[INFO] installing new chromedriver")
70 |                 version = all_match[1]
71 |             driver_url = base_driver_url + version + '/' + file_name
72 |     
73 |             # Download the file.
74 |             print('[INFO] downloading chromedriver ver: %s: %s'% (version, driver_url))
75 |             app_path = os.path.dirname(os.path.realpath(__file__))
76 |             chromedriver_path = os.path.normpath(os.path.join(app_path, 'webdriver', webdriver_executable()))
77 |             file_path = os.path.normpath(os.path.join(app_path, 'webdriver', file_name))
78 |             urllib.request.urlretrieve(driver_url, file_path)
79 |     
80 |             # Unzip the file into folder
81 |             with zipfile.ZipFile(file_path, 'r') as zip_ref:
82 |                 zip_ref.extractall(os.path.normpath(os.path.join(app_path, 'webdriver')))
83 |             st = os.stat(chromedriver_path)
84 |             os.chmod(chromedriver_path, st.st_mode | stat.S_IEXEC)
85 |             print('[INFO] lastest chromedriver downloaded')
86 |             # Cleanup.
87 |             os.remove(file_path)
88 |             result = True
89 |     except Exception:
90 |         print("[WARN] unable to download lastest chromedriver. the system will use the local version instead.")
91 |     
92 |     return result
93 | 
94 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
  1 | absl-py==0.13.0
  2 | alabaster @ file:///home/ktietz/src/ci/alabaster_1611921544520/work
  3 | anaconda-client @ file:///tmp/build/80754af9/anaconda-client_1635330891925/work
  4 | anaconda-project @ file:///tmp/build/80754af9/anaconda-project_1637161053845/work
  5 | anyio @ file:///tmp/build/80754af9/anyio_1644481698350/work/dist
  6 | appdirs==1.4.4
  7 | argh==0.26.2
  8 | argon2-cffi @ file:///opt/conda/conda-bld/argon2-cffi_1645000214183/work
  9 | argon2-cffi-bindings @ file:///tmp/build/80754af9/argon2-cffi-bindings_1644569684262/work
 10 | arrow==0.13.1
 11 | asn1crypto @ file:///tmp/build/80754af9/asn1crypto_1596577642040/work
 12 | astroid @ file:///tmp/build/80754af9/astroid_1628063142195/work
 13 | astropy @ file:///tmp/build/80754af9/astropy_1638772087871/work
 14 | astunparse==1.6.3
 15 | async-generator==1.10
 16 | atomicwrites==1.4.0
 17 | attrs==19.3.0
 18 | Automat==20.2.0
 19 | autopep8 @ file:///opt/conda/conda-bld/autopep8_1639166893812/work
 20 | Babel @ file:///tmp/build/80754af9/babel_1620871417480/work
 21 | backcall @ file:///home/ktietz/src/ci/backcall_1611930011877/work
 22 | backports.functools-lru-cache @ file:///tmp/build/80754af9/backports.functools_lru_cache_1618170165463/work
 23 | backports.shutil-get-terminal-size @ file:///tmp/build/80754af9/backports.shutil_get_terminal_size_1608222128777/work
 24 | backports.tempfile @ file:///home/linux1/recipes/ci/backports.tempfile_1610991236607/work
 25 | backports.weakref==1.0.post1
 26 | beautifulsoup4==4.8.2
 27 | binaryornot @ file:///tmp/build/80754af9/binaryornot_1617751525010/work
 28 | bitarray @ file:///tmp/build/80754af9/bitarray_1641817257091/work
 29 | bkcharts==0.2
 30 | black==19.10b0
 31 | bleach @ file:///opt/conda/conda-bld/bleach_1641577558959/work
 32 | bokeh @ file:///tmp/build/80754af9/bokeh_1638349634419/work
 33 | boto==2.49.0
 34 | Bottleneck==1.3.2
 35 | brotlipy==0.7.0
 36 | bs4==0.0.1
 37 | cachetools==4.2.2
 38 | certifi==2019.11.28
 39 | cffi==1.14.0
 40 | chardet==3.0.4
 41 | charset-normalizer @ file:///tmp/build/80754af9/charset-normalizer_1630003229654/work
 42 | clang==5.0
 43 | click==8.0.3
 44 | cloudpickle @ file:///tmp/build/80754af9/cloudpickle_1632508026186/work
 45 | clyent==1.2.2
 46 | colorama @ file:///tmp/build/80754af9/colorama_1607707115595/work
 47 | conda-content-trust @ file:///tmp/build/80754af9/conda-content-trust_1617045594566/work
 48 | conda-pack @ file:///tmp/build/80754af9/conda-pack_1611163042455/work
 49 | conda-package-handling @ file:///tmp/build/80754af9/conda-package-handling_1618262148928/work
 50 | conda-repo-cli @ file:///tmp/build/80754af9/conda-repo-cli_1620168426516/work
 51 | conda-verify==3.4.2
 52 | constantly==15.1.0
 53 | contextlib2 @ file:///Users/ktietz/demo/mc3/conda-bld/contextlib2_1630668244042/work
 54 | cookiecutter @ file:///tmp/build/80754af9/cookiecutter_1617748928239/work
 55 | cryptography==2.8
 56 | cssselect==1.1.0
 57 | cycler @ file:///tmp/build/80754af9/cycler_1637851556182/work
 58 | Cython @ file:///tmp/build/80754af9/cython_1639474574311/work
 59 | cytoolz==0.11.0
 60 | daal4py==2021.5.0
 61 | dask==2021.10.0
 62 | debugpy @ file:///tmp/build/80754af9/debugpy_1637091796427/work
 63 | decorator @ file:///opt/conda/conda-bld/decorator_1643638310831/work
 64 | defusedxml @ file:///tmp/build/80754af9/defusedxml_1615228127516/work
 65 | diff-match-patch @ file:///Users/ktietz/demo/mc3/conda-bld/diff-match-patch_1630511840874/work
 66 | distributed @ file:///tmp/build/80754af9/distributed_1635968203122/work
 67 | docutils @ file:///tmp/build/80754af9/docutils_1620827984873/work
 68 | entrypoints==0.3
 69 | et-xmlfile==1.1.0
 70 | fastcache==1.1.0
 71 | filelock @ file:///opt/conda/conda-bld/filelock_1642510437405/work
 72 | flake8 @ file:///tmp/build/80754af9/flake8_1620776156532/work
 73 | Flask @ file:///home/ktietz/src/ci/flask_1611932660458/work
 74 | flatbuffers==1.12
 75 | fonttools==4.25.0
 76 | fsspec @ file:///opt/conda/conda-bld/fsspec_1642510437511/work
 77 | future==0.18.2
 78 | gast==0.4.0
 79 | gevent @ file:///tmp/build/80754af9/gevent_1628273677693/work
 80 | glob2 @ file:///home/linux1/recipes/ci/glob2_1610991677669/work
 81 | gmpy2 @ file:///tmp/build/80754af9/gmpy2_1645455532332/work
 82 | google-auth==1.35.0
 83 | google-auth-oauthlib==0.4.5
 84 | google-pasta==0.2.0
 85 | graphviz==0.8.4
 86 | greenlet @ file:///tmp/build/80754af9/greenlet_1628887725296/work
 87 | grpcio==1.39.0
 88 | h11==0.13.0
 89 | h5py==3.1.0
 90 | HeapDict @ file:///Users/ktietz/demo/mc3/conda-bld/heapdict_1630598515714/work
 91 | html5lib @ file:///Users/ktietz/demo/mc3/conda-bld/html5lib_1629144453894/work
 92 | hyperlink==19.0.0
 93 | idna==2.9
 94 | imagecodecs @ file:///tmp/build/80754af9/imagecodecs_1635529103369/work
 95 | imageio @ file:///tmp/build/80754af9/imageio_1617700267927/work
 96 | imagesize @ file:///tmp/build/80754af9/imagesize_1637939814114/work
 97 | importlib-metadata @ file:///tmp/build/80754af9/importlib-metadata_1638542885373/work
 98 | incremental==17.5.0
 99 | inflection==0.5.1
100 | iniconfig @ file:///home/linux1/recipes/ci/iniconfig_1610983019677/work
101 | intervaltree @ file:///Users/ktietz/demo/mc3/conda-bld/intervaltree_1630511889664/work
102 | ipykernel @ file:///tmp/build/80754af9/ipykernel_1633545412716/work/dist/ipykernel-6.4.1-py3-none-any.whl
103 | ipython @ file:///tmp/build/80754af9/ipython_1643818147236/work
104 | ipython-genutils @ file:///tmp/build/80754af9/ipython_genutils_1606773439826/work
105 | ipywidgets @ file:///tmp/build/80754af9/ipywidgets_1634143127070/work
106 | isort @ file:///tmp/build/80754af9/isort_1628603791788/work
107 | itsdangerous @ file:///tmp/build/80754af9/itsdangerous_1621432558163/work
108 | jdcal @ file:///Users/ktietz/demo/mc3/conda-bld/jdcal_1630584345063/work
109 | jedi @ file:///tmp/build/80754af9/jedi_1644315233700/work
110 | jeepney @ file:///tmp/build/80754af9/jeepney_1627537048313/work
111 | Jinja2 @ file:///tmp/build/80754af9/jinja2_1612213139570/work
112 | jinja2-time @ file:///tmp/build/80754af9/jinja2-time_1617751524098/work
113 | joblib @ file:///tmp/build/80754af9/joblib_1635411271373/work
114 | json5 @ file:///tmp/build/80754af9/json5_1624432770122/work
115 | jsonschema @ file:///Users/ktietz/demo/mc3/conda-bld/jsonschema_1630511932244/work
116 | jupyter==1.0.0
117 | jupyter-client @ file:///tmp/build/80754af9/jupyter_client_1616770841739/work
118 | jupyter-console @ file:///tmp/build/80754af9/jupyter_console_1616615302928/work
119 | jupyter-core @ file:///tmp/build/80754af9/jupyter_core_1636524756443/work
120 | jupyter-server @ file:///opt/conda/conda-bld/jupyter_server_1644494914632/work
121 | jupyterlab @ file:///opt/conda/conda-bld/jupyterlab_1644830542042/work
122 | jupyterlab-pygments @ file:///tmp/build/80754af9/jupyterlab_pygments_1601490720602/work
123 | jupyterlab-server @ file:///opt/conda/conda-bld/jupyterlab_server_1644500396812/work
124 | jupyterlab-widgets @ file:///tmp/build/80754af9/jupyterlab_widgets_1609884341231/work
125 | keras==2.6.0
126 | Keras-Preprocessing==1.1.2
127 | keyring @ file:///tmp/build/80754af9/keyring_1638531356231/work
128 | kiwisolver @ file:///opt/conda/conda-bld/kiwisolver_1638569886207/work
129 | lazy-object-proxy @ file:///tmp/build/80754af9/lazy-object-proxy_1616526917483/work
130 | libarchive-c @ file:///tmp/build/80754af9/python-libarchive-c_1617780486945/work
131 | llvmlite==0.37.0
132 | locket==0.2.1
133 | lxml==4.5.0
134 | Markdown==3.3.4
135 | MarkupSafe==1.1.1
136 | matplotlib @ file:///tmp/build/80754af9/matplotlib-suite_1645455682260/work
137 | matplotlib-inline @ file:///tmp/build/80754af9/matplotlib-inline_1628242447089/work
138 | mccabe==0.6.1
139 | mistune==0.8.4
140 | mkl-fft==1.3.1
141 | mkl-random @ file:///tmp/build/80754af9/mkl_random_1626186064646/work
142 | mkl-service==2.4.0
143 | mock @ file:///tmp/build/80754af9/mock_1607622725907/work
144 | more-itertools @ file:///tmp/build/80754af9/more-itertools_1637733554872/work
145 | mpmath==1.2.1
146 | msgpack @ file:///tmp/build/80754af9/msgpack-python_1612287151062/work
147 | multipledispatch==0.6.0
148 | munkres==1.1.4
149 | mxnet==1.5.0
150 | mypy-extensions==0.4.3
151 | navigator-updater==0.2.1
152 | nb-conda==2.2.1
153 | nb-conda-kernels @ file:///tmp/build/80754af9/nb_conda_kernels_1606775941989/work
154 | nbclassic @ file:///opt/conda/conda-bld/nbclassic_1644943264176/work
155 | nbclient @ file:///tmp/build/80754af9/nbclient_1645431659072/work
156 | nbconvert @ file:///opt/conda/conda-bld/nbconvert_1641309195684/work
157 | nbformat @ file:///tmp/build/80754af9/nbformat_1617383369282/work
158 | nest-asyncio @ file:///tmp/build/80754af9/nest-asyncio_1613680548246/work
159 | netron==4.9.8
160 | networkx @ file:///tmp/build/80754af9/networkx_1633639043937/work
161 | nltk==3.6.5
162 | nose @ file:///opt/conda/conda-bld/nose_1642704612149/work
163 | notebook @ file:///tmp/build/80754af9/notebook_1645002536250/work
164 | numba @ file:///tmp/build/80754af9/numba_1635185927556/work
165 | numexpr @ file:///tmp/build/80754af9/numexpr_1640704208950/work
166 | numpy==1.22.3
167 | numpydoc @ file:///opt/conda/conda-bld/numpydoc_1643788541039/work
168 | oauthlib==3.1.1
169 | olefile @ file:///Users/ktietz/demo/mc3/conda-bld/olefile_1629805411829/work
170 | opencv-python==4.5.2.54
171 | openpyxl @ file:///tmp/build/80754af9/openpyxl_1632777717936/work
172 | opt-einsum==3.3.0
173 | outcome==1.1.0
174 | packaging @ file:///tmp/build/80754af9/packaging_1637314298585/work
175 | pandas==1.4.1
176 | pandocfilters @ file:///opt/conda/conda-bld/pandocfilters_1643405455980/work
177 | parsel==1.5.2
178 | parso @ file:///opt/conda/conda-bld/parso_1641458642106/work
179 | partd @ file:///tmp/build/80754af9/partd_1618000087440/work
180 | path @ file:///opt/conda/conda-bld/path_1641578212155/work
181 | pathlib2 @ file:///tmp/build/80754af9/pathlib2_1625585678054/work
182 | pathspec==0.7.0
183 | patsy==0.5.2
184 | pep8==1.7.1
185 | pexpect @ file:///tmp/build/80754af9/pexpect_1605563209008/work
186 | pickleshare @ file:///tmp/build/80754af9/pickleshare_1606932040724/work
187 | Pillow==8.4.0
188 | pkginfo @ file:///tmp/build/80754af9/pkginfo_1643162084911/work
189 | pluggy @ file:///tmp/build/80754af9/pluggy_1633715052817/work
190 | ply==3.11
191 | poyo @ file:///tmp/build/80754af9/poyo_1617751526755/work
192 | prometheus-client @ file:///opt/conda/conda-bld/prometheus_client_1643788673601/work
193 | prompt-toolkit @ file:///tmp/build/80754af9/prompt-toolkit_1633440160888/work
194 | Protego==0.1.16
195 | protobuf==3.17.3
196 | psutil @ file:///tmp/build/80754af9/psutil_1612298023621/work
197 | ptyprocess @ file:///tmp/build/80754af9/ptyprocess_1609355006118/work/dist/ptyprocess-0.7.0-py2.py3-none-any.whl
198 | py @ file:///opt/conda/conda-bld/py_1644396412707/work
199 | pyasn1==0.4.8
200 | pyasn1-modules==0.2.8
201 | pycodestyle @ file:///tmp/build/80754af9/pycodestyle_1615748559966/work
202 | pycosat==0.6.3
203 | pycparser==2.20
204 | pycurl==7.44.1
205 | PyDispatcher==2.0.5
206 | pydocstyle @ file:///tmp/build/80754af9/pydocstyle_1621600989141/work
207 | pyerfa @ file:///tmp/build/80754af9/pyerfa_1621560806183/work
208 | pyflakes @ file:///tmp/build/80754af9/pyflakes_1617200973297/work
209 | Pygments @ file:///opt/conda/conda-bld/pygments_1644249106324/work
210 | PyHamcrest==2.0.2
211 | pyinstrument==3.4.2
212 | pyinstrument-cext==0.2.4
213 | pylint @ file:///tmp/build/80754af9/pylint_1627536788098/work
214 | pyls-spyder==0.4.0
215 | pyodbc===4.0.0-unsupported
216 | pyOpenSSL==19.1.0
217 | pyparsing @ file:///tmp/build/80754af9/pyparsing_1635766073266/work
218 | pyrsistent @ file:///tmp/build/80754af9/pyrsistent_1636110947380/work
219 | PySocks==1.7.1
220 | pytest==6.2.5
221 | python-dateutil @ file:///tmp/build/80754af9/python-dateutil_1626374649649/work
222 | python-lsp-black @ file:///tmp/build/80754af9/python-lsp-black_1634232156041/work
223 | python-lsp-jsonrpc==1.0.0
224 | python-lsp-server==1.2.4
225 | python-slugify @ file:///tmp/build/80754af9/python-slugify_1620405669636/work
226 | pytz==2021.3
227 | PyWavelets @ file:///tmp/build/80754af9/pywavelets_1601658317819/work
228 | pyxdg @ file:///tmp/build/80754af9/pyxdg_1603822279816/work
229 | PyYAML==6.0
230 | pyzmq @ file:///tmp/build/80754af9/pyzmq_1638436375034/work
231 | QDarkStyle @ file:///tmp/build/80754af9/qdarkstyle_1617386714626/work
232 | qstylizer @ file:///tmp/build/80754af9/qstylizer_1617713584600/work/dist/qstylizer-0.1.10-py2.py3-none-any.whl
233 | QtAwesome @ file:///tmp/build/80754af9/qtawesome_1637160816833/work
234 | qtconsole @ file:///opt/conda/conda-bld/qtconsole_1643819126524/work
235 | QtPy @ file:///opt/conda/conda-bld/qtpy_1643087291789/work
236 | queuelib==1.5.0
237 | regex @ file:///opt/conda/conda-bld/regex_1642021319040/work
238 | requests==2.24.0
239 | requests-oauthlib==1.3.0
240 | rope @ file:///opt/conda/conda-bld/rope_1643788605236/work
241 | rsa==4.7.2
242 | Rtree @ file:///tmp/build/80754af9/rtree_1618420845272/work
243 | ruamel-yaml-conda @ file:///tmp/build/80754af9/ruamel_yaml_1616016699510/work
244 | scikit-image==0.18.3
245 | scikit-learn @ file:///tmp/build/80754af9/scikit-learn_1642617107864/work
246 | scikit-learn-intelex==2021.20220215.212714
247 | scipy @ file:///tmp/build/80754af9/scipy_1641555001653/work
248 | Scrapy==2.0.0
249 | seaborn @ file:///tmp/build/80754af9/seaborn_1629307859561/work
250 | SecretStorage @ file:///tmp/build/80754af9/secretstorage_1614022784285/work
251 | selenium==3.141.0
252 | Send2Trash @ file:///tmp/build/80754af9/send2trash_1632406701022/work
253 | service-identity==18.1.0
254 | simplegeneric==0.8.1
255 | singledispatch @ file:///tmp/build/80754af9/singledispatch_1629321204894/work
256 | sip==4.19.13
257 | six==1.14.0
258 | sniffio==1.2.0
259 | snowballstemmer @ file:///tmp/build/80754af9/snowballstemmer_1637937080595/work
260 | sortedcollections @ file:///tmp/build/80754af9/sortedcollections_1611172717284/work
261 | sortedcontainers==2.4.0
262 | soupsieve==2.0
263 | Sphinx @ file:///opt/conda/conda-bld/sphinx_1643644169832/work
264 | sphinxcontrib-applehelp @ file:///home/ktietz/src/ci/sphinxcontrib-applehelp_1611920841464/work
265 | sphinxcontrib-devhelp @ file:///home/ktietz/src/ci/sphinxcontrib-devhelp_1611920923094/work
266 | sphinxcontrib-htmlhelp @ file:///tmp/build/80754af9/sphinxcontrib-htmlhelp_1623945626792/work
267 | sphinxcontrib-jsmath @ file:///home/ktietz/src/ci/sphinxcontrib-jsmath_1611920942228/work
268 | sphinxcontrib-qthelp @ file:///home/ktietz/src/ci/sphinxcontrib-qthelp_1611921055322/work
269 | sphinxcontrib-serializinghtml @ file:///tmp/build/80754af9/sphinxcontrib-serializinghtml_1624451540180/work
270 | sphinxcontrib-websupport @ file:///tmp/build/80754af9/sphinxcontrib-websupport_1597081412696/work
271 | spyder @ file:///tmp/build/80754af9/spyder_1636480225430/work
272 | spyder-kernels @ file:///tmp/build/80754af9/spyder-kernels_1634236926649/work
273 | SQLAlchemy @ file:///tmp/build/80754af9/sqlalchemy_1638290671404/work
274 | statsmodels @ file:///tmp/build/80754af9/statsmodels_1614023746358/work
275 | sympy @ file:///tmp/build/80754af9/sympy_1635237063176/work
276 | tables==3.6.1
277 | TBB==0.2
278 | tblib @ file:///Users/ktietz/demo/mc3/conda-bld/tblib_1629402031467/work
279 | tensorboard==2.6.0
280 | tensorboard-data-server==0.6.1
281 | tensorboard-plugin-wit==1.8.0
282 | tensorflow-estimator==2.6.0
283 | tensorflow-gpu==2.6.0
284 | termcolor==1.1.0
285 | terminado @ file:///tmp/build/80754af9/terminado_1644322581811/work
286 | testpath @ file:///tmp/build/80754af9/testpath_1624638946665/work
287 | text-unidecode @ file:///Users/ktietz/demo/mc3/conda-bld/text-unidecode_1629401354553/work
288 | textdistance @ file:///tmp/build/80754af9/textdistance_1612461398012/work
289 | thop==0.0.31.post2005241907
290 | threadpoolctl @ file:///Users/ktietz/demo/mc3/conda-bld/threadpoolctl_1629802263681/work
291 | three-merge @ file:///tmp/build/80754af9/three-merge_1607553261110/work
292 | tifffile @ file:///tmp/build/80754af9/tifffile_1627275862826/work
293 | tinycss @ file:///tmp/build/80754af9/tinycss_1617713798712/work
294 | toml @ file:///tmp/build/80754af9/toml_1616166611790/work
295 | toolz @ file:///tmp/build/80754af9/toolz_1636545406491/work
296 | torch==1.5.1
297 | torchaudio==0.8.1
298 | torchvision==0.6.1
299 | tornado @ file:///tmp/build/80754af9/tornado_1606942300299/work
300 | tqdm @ file:///tmp/build/80754af9/tqdm_1635330843403/work
301 | traitlets @ file:///tmp/build/80754af9/traitlets_1636710298902/work
302 | trio==0.20.0
303 | trio-websocket==0.9.2
304 | Twisted==19.10.0
305 | typed-ast @ file:///tmp/build/80754af9/typed-ast_1624953673417/work
306 | typing-extensions==3.7.4.3
307 | ujson @ file:///opt/conda/conda-bld/ujson_1640703856928/work
308 | unicodecsv==0.14.1
309 | Unidecode @ file:///tmp/build/80754af9/unidecode_1614712377438/work
310 | urllib3==1.25.8
311 | w3lib==1.21.0
312 | watchdog @ file:///tmp/build/80754af9/watchdog_1638366565112/work
313 | wcwidth @ file:///Users/ktietz/demo/mc3/conda-bld/wcwidth_1629357192024/work
314 | webencodings==0.5.1
315 | websocket-client @ file:///tmp/build/80754af9/websocket-client_1614804261064/work
316 | Werkzeug==2.0.1
317 | whichcraft @ file:///tmp/build/80754af9/whichcraft_1617751293875/work
318 | widgetsnbextension @ file:///tmp/build/80754af9/widgetsnbextension_1645009353553/work
319 | wrapt==1.12.1
320 | wsproto==1.1.0
321 | wurlitzer @ file:///tmp/build/80754af9/wurlitzer_1638354972036/work
322 | xlrd @ file:///tmp/build/80754af9/xlrd_1608072521494/work
323 | XlsxWriter @ file:///tmp/build/80754af9/xlsxwriter_1636633762820/work
324 | xlwt==1.3.0
325 | xmltodict @ file:///Users/ktietz/demo/mc3/conda-bld/xmltodict_1629301980723/work
326 | yapf @ file:///tmp/build/80754af9/yapf_1615749224965/work
327 | zict==2.0.0
328 | zipp @ file:///opt/conda/conda-bld/zipp_1641824620731/work
329 | zope.event==4.5.0
330 | zope.interface==4.7.2
331 | 


--------------------------------------------------------------------------------
/webdriver/chromedriver:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JJLimmm/Website-Image-Scraper/0fddf9b3651875b81374057466e388c0ad6e0ef5/webdriver/chromedriver


--------------------------------------------------------------------------------
/webdriver/chromedriver.exe:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JJLimmm/Website-Image-Scraper/0fddf9b3651875b81374057466e388c0ad6e0ef5/webdriver/chromedriver.exe


--------------------------------------------------------------------------------
/youtube_thumbnail.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JJLimmm/Website-Image-Scraper/0fddf9b3651875b81374057466e388c0ad6e0ef5/youtube_thumbnail.PNG


--------------------------------------------------------------------------------