├── .gitattributes ├── README.md ├── getProxy.py ├── main.py ├── proxies.txt └── requirements.txt /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Pimeyes.com Scraper 2 |
3 | 4 | 5 |
6 | 7 |
8 | 9 | 10 | ## Overview 11 | 12 | This tool is a Selenium-based automation script designed to automate the process of uploading a face image to [pimeyes.com](https://pimeyes.com) for reverse face searching. It uploads the image, initiates the search, and then retrieves and outputs the URL of the search results page along with the number of results found. 13 | 14 | ## Requirements 15 | 16 | Before you begin, ensure you have met the following requirements: 17 | - Python 3.11 or higher installed on your machine. 18 | - You have Latest Version of Google Chrome installed. 19 | 20 | ## Installation & Use 21 | - Clone this repo: ```https://github.com/Nix4444/Pimeyes-scraper``` 22 | - Install the requirements: ```pip install -r requirements.txt``` 23 | - Add the image to be searched in your working directory. 24 | - Start the bot: ```python main.py``` 25 | - Enter the name of the image, and results will be returned. 26 | - You will be limited to 10 searches per IP, so it's better to use proxy or a VPN will work too. 27 | - To use proxies, set ``use_proxy`` in ``main.py`` (Line 8) to ``True`` add them to ``proxies.txt`` in the format: USERNAME:PASS@IP:PORT 28 | - Based on type of proxy, modify the ``line 13`` and change the function to ``fetchsocks5() or fetchhttps() or fetchhttp()`` -------------------------------------------------------------------------------- /getProxy.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | 4 | def fetchsocks5(): 5 | with open('proxies.txt') as f: 6 | proxies = f.read().splitlines() 7 | return 'socks5://' + random.choice(proxies) 8 | 9 | def fetchhttps(): 10 | with open('proxies.txt') as f: 11 | proxies = f.read().splitlines() 12 | return 'https://' + random.choice(proxies) 13 | 14 | def fetchhttp(): 15 | with open('proxies.txt') as f: 16 | proxies = f.read().splitlines() 17 | return 'http://' + random.choice(proxies) 18 | 19 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | from selenium.webdriver.chrome.options import Options 2 | from selenium.webdriver.common.by import By 3 | from selenium.webdriver.support.ui import WebDriverWait 4 | from selenium.webdriver.support import expected_conditions as EC 5 | import time 6 | 7 | 8 | use_proxy = False # Set to True to use proxy, False to use your host IP 9 | 10 | if use_proxy: 11 | from seleniumwire import webdriver 12 | import getProxy 13 | else: 14 | from selenium import webdriver 15 | 16 | url = "https://pimeyes.com/en" 17 | 18 | def upload(url, path, use_proxy): 19 | driver = None 20 | 21 | if use_proxy: 22 | prox = getProxy.fetchsocks5() # FORMAT = USERNAME:PASS@IP:PORT 23 | options = { 24 | 'proxy': { 25 | 'http': prox, 26 | 'https': prox, 27 | 'no_proxy': 'localhost,127.0.0.1' 28 | } 29 | } 30 | driver = webdriver.Chrome(seleniumwire_options=options) 31 | else: 32 | chrome_options = Options() 33 | # chrome_options.add_argument('--headless') # Uncomment to run Chrome in headless mode (no GUI) 34 | driver = webdriver.Chrome(options=chrome_options) 35 | 36 | results = None 37 | currenturl = None 38 | 39 | try: 40 | driver.get(url) 41 | upload_button = WebDriverWait(driver, 20).until( 42 | EC.element_to_be_clickable((By.XPATH, '//*[@id="hero-section"]/div/div[1]/div/div/div[1]/button[2]')) 43 | ) 44 | 45 | upload_button.click() 46 | 47 | file_input = WebDriverWait(driver, 20).until( 48 | EC.presence_of_element_located((By.CSS_SELECTOR, 'input[type=file]')) 49 | ) 50 | 51 | file_input.send_keys(path) 52 | 53 | agreement1_xpath = '#app > div.wrapper.mobile-fullscreen-mode.mobile-full-height > div > div > div > div > div > div > div.permissions > div:nth-child(1) > label > input[type=checkbox]' 54 | agreement2_xpath = '#app > div.wrapper.mobile-fullscreen-mode.mobile-full-height > div > div > div > div > div > div > div.permissions > div:nth-child(2) > label > input[type=checkbox]' 55 | agreement3_xpath = '#app > div.wrapper.mobile-fullscreen-mode.mobile-full-height > div > div > div > div > div > div > div.permissions > div:nth-child(3) > label > input[type=checkbox]' 56 | submit_xpath = '#app > div.wrapper.mobile-fullscreen-mode.mobile-full-height > div > div > div > div > div > div > button' 57 | 58 | WebDriverWait(driver, 15).until(EC.element_to_be_clickable((By.CSS_SELECTOR, agreement1_xpath))).click() 59 | WebDriverWait(driver, 15).until(EC.element_to_be_clickable((By.CSS_SELECTOR, agreement2_xpath))).click() 60 | WebDriverWait(driver, 15).until(EC.element_to_be_clickable((By.CSS_SELECTOR, agreement3_xpath))).click() 61 | WebDriverWait(driver, 15).until(EC.element_to_be_clickable((By.CSS_SELECTOR, submit_xpath))).click() 62 | 63 | time.sleep(5) 64 | currenturl = driver.current_url 65 | resultsXPATH = '//*[@id="results"]/div/div/div[3]/div/div/div[1]/div/div[1]/button/div/span/span' 66 | results = WebDriverWait(driver, 10).until( 67 | EC.element_to_be_clickable((By.XPATH, resultsXPATH)) 68 | ).text 69 | 70 | except Exception as e: 71 | print(f"An exception occurred: {e}") 72 | 73 | finally: 74 | print("Results: ", results) 75 | print("URL: ", currenturl) 76 | if driver: 77 | driver.quit() 78 | 79 | def main(): 80 | path = input("Enter path to the image: ") 81 | upload(url, path, use_proxy) 82 | 83 | if __name__ == "__main__": 84 | main() 85 | -------------------------------------------------------------------------------- /proxies.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nix4444/Pimeyes-scraper/03eac8ca53bd4b2650f26daa1ceb0374966dd79e/proxies.txt -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | selenium 2 | selenium-wire --------------------------------------------------------------------------------