├── .gitignore ├── README.md ├── client.py ├── images └── safe-search.png └── requirements.txt /.gitignore: -------------------------------------------------------------------------------- 1 | # Project exclude paths 2 | /venv/ -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![safe search](./images/safe-search.png) 2 | 3 | You know how you can filter out all the good stuff with google image search 4 | 5 | But what if you **only** want the good stuff? 6 | 7 | # Usage 8 | 9 | `pip install -r requirements.txt` 10 | 11 | 12 | - Register Google Search API utilities on [DataForSeo.com](dataforseo.com) (not affiliated, I hate it) 13 | 14 | - Set `DFS_USERNAME` `DFS_PASSWORD` environment variables 15 | 16 | - Sample Code: 17 | 18 | ```python 19 | >>> unsafe_searcher = UnsafeSearcher() 20 | >>> unsafe_searcher.get_funky(keyword="belle delphine") 21 | 22 | task posted 23 | waiting for tasks to finish... 24 | polling... 25 | Successfully harvested 354 funky images 26 | Downloading... 27 | ``` 28 | 29 | -------------------------------------------------------------------------------- /client.py: -------------------------------------------------------------------------------- 1 | import mimetypes 2 | import os 3 | import shutil 4 | import sys 5 | from http.client import HTTPSConnection 6 | from base64 import b64encode 7 | from json import loads 8 | from json import dumps 9 | import time 10 | from pathlib import Path 11 | import requests 12 | from tqdm import tqdm 13 | 14 | class RestClient: 15 | domain = "api.dataforseo.com" 16 | 17 | def __init__(self, username, password): 18 | self.username = username 19 | self.password = password 20 | 21 | def request(self, path, method, data=None): 22 | connection = HTTPSConnection(self.domain) 23 | try: 24 | base64_bytes = b64encode( 25 | ("%s:%s" % (self.username, self.password)).encode("ascii") 26 | ).decode("ascii") 27 | headers = {'Authorization': 'Basic %s' % base64_bytes, 'Content-Encoding': 'gzip'} 28 | connection.request(method, path, headers=headers, body=data) 29 | response = connection.getresponse() 30 | return loads(response.read().decode()) 31 | finally: 32 | connection.close() 33 | 34 | def get(self, path): 35 | return self.request(path, 'GET') 36 | 37 | def post(self, path, data): 38 | if isinstance(data, str): 39 | data_str = data 40 | else: 41 | data_str = dumps(data) 42 | return self.request(path, 'POST', data_str) 43 | 44 | 45 | client = RestClient(os.environ.get("DFS_USERNAME"), os.environ.get("DFS_PASSWORD")) 46 | 47 | 48 | class UnsafeSearcher: 49 | 50 | def __init__(self): 51 | self.safe_id = None 52 | self.unsafe_id = None 53 | self.safe_urls = set() 54 | self.unsafe_urls = set() 55 | self.funky_urls = set() 56 | 57 | def get_funky(self, keyword): 58 | x = client.post("https://api.dataforseo.com/v3/serp/google/images/task_post", 59 | {0: dict(language_code="en", location_code=2840, keyword=keyword, depth=700, 60 | search_param="&safe=active"), 61 | 1: dict(language_code="en", location_code=2840, keyword=keyword, depth=300, 62 | search_param="&safe=off")}) 63 | self.safe_id = x["tasks"][0]["id"] 64 | self.unsafe_id = x["tasks"][1]["id"] 65 | print("task posted") 66 | # print("safe_id: ", self.safe_id) 67 | # print("unsafe_id: ", self.unsafe_id) 68 | 69 | print("waiting for tasks to finish (timeout = 5 minutes)...") 70 | time.sleep(5) 71 | 72 | time_elapsed = 0 73 | while 1: 74 | try: 75 | safe_response = client.get(f"/v3/serp/google/images/task_get/advanced/{self.safe_id}") 76 | unsafe_response = client.get(f"/v3/serp/google/images/task_get/advanced/{self.unsafe_id}") 77 | 78 | self.safe_urls = set([item["source_url"] for item in safe_response["tasks"][0]["result"][0]["items"]]) 79 | self.unsafe_urls = set([item["source_url"] for item in unsafe_response["tasks"][0]["result"][0]["items"]]) 80 | except: 81 | print("polling...") 82 | time.sleep(5) 83 | time_elapsed += 5 84 | if time_elapsed >= 300: 85 | print("Timed Out") 86 | sys.exit(1) 87 | else: 88 | break 89 | 90 | self.funky_urls = self.unsafe_urls - self.safe_urls 91 | 92 | print(f"Successfully harvested {len(self.funky_urls)} funky images") 93 | os.makedirs(keyword, exist_ok=True) 94 | print("Downloading...") 95 | 96 | for i, url in tqdm(list(enumerate(self.funky_urls))): 97 | try: 98 | resp = requests.get(url, stream=True) 99 | content_type = resp.headers['content-type'] 100 | extension = mimetypes.guess_extension(content_type) 101 | local_file = open(Path(keyword) / f"{i}{extension}", "wb") 102 | resp.raw.decode_content = True 103 | shutil.copyfileobj(resp.raw, local_file) 104 | del resp 105 | except: 106 | print(f"Failed to download: {url}") 107 | continue 108 | 109 | 110 | us = UnsafeSearcher() -------------------------------------------------------------------------------- /images/safe-search.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Madoshakalaka/unsafe-search/b3c5a3bd19868c7da65fdab8ef08da12909af830/images/safe-search.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | requests 2 | tqdm --------------------------------------------------------------------------------