├── src └── godork │ ├── __init__.py │ ├── utils │ ├── __init__.py │ ├── colors.py │ ├── exceptions.py │ ├── parse.py │ ├── banner.py │ └── user_agents.py │ ├── helpers │ ├── __init__.py │ ├── options.py │ ├── extractor.py │ ├── reports.py │ └── console.py │ ├── services │ ├── __init__.py │ ├── driver.py │ ├── version.py │ ├── requester.py │ ├── recaptcha.py │ └── scrape.py │ └── godork.py ├── MANIFEST.in ├── requirements.txt ├── .dockerignore ├── LICENSE ├── setup.py ├── Dockerfile ├── .gitignore └── README.md /src/godork/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/godork/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/godork/helpers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/godork/services/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.md 2 | include LICENSE 3 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | bs4 2 | pydub 3 | psutil 4 | aiohttp 5 | asyncio 6 | selenium 7 | setuptools 8 | SpeechRecognition 9 | webdriver-manager 10 | undetected-chromedriver 11 | -------------------------------------------------------------------------------- /src/godork/utils/colors.py: -------------------------------------------------------------------------------- 1 | class Bgcolor: 2 | 3 | # Just a bunch of colors in one place. 4 | 5 | DEFAULT = '\033[0m' 6 | WARNING = '\033[33m' 7 | PURPLE = '\033[35m' 8 | GREEN = '\033[32m' 9 | BOLD = '\033[1m' 10 | GRAY = '\033[2m' 11 | BLUE = '\033[34m' 12 | CYAN = '\033[36m' 13 | RED = '\033[31m' -------------------------------------------------------------------------------- /src/godork/utils/exceptions.py: -------------------------------------------------------------------------------- 1 | class GodorkException(Exception): 2 | # This exception is used for general errors 3 | pass 4 | 5 | class GodorkTimeout(TimeoutError): 6 | # This exception is used for timeout errors 7 | pass 8 | 9 | class GodorkMaxRetries(Exception): 10 | # This exception is used for max retries errors 11 | pass 12 | 13 | class GodorkNoData(Exception): 14 | # This exception is used for no data errors 15 | pass -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | # Version control 2 | .git 3 | .github 4 | .gitignore 5 | 6 | # Environment and cache 7 | .venv 8 | .env 9 | .env.local 10 | __pycache__ 11 | *.pyc 12 | *.pyo 13 | *.pyd 14 | .Python 15 | .pytest_cache 16 | .pdm-build 17 | 18 | # Distribution / packaging 19 | dist 20 | build 21 | *.egg-info 22 | 23 | # Development 24 | .vscode 25 | .idea 26 | *.swp 27 | *.swo 28 | 29 | # Docs 30 | docs/site 31 | # Notebooks 32 | notebooks/.ipynb_checkpoints 33 | 34 | # Docker 35 | Dockerfile 36 | .dockerignore 37 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023-2025 Thunder 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/godork/godork.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import asyncio 4 | 5 | from .utils.colors import Bgcolor 6 | from .helpers.console import Console 7 | from .helpers.options import OptionParser 8 | from .services.version import check_version 9 | from .services.scrape import Scraper 10 | 11 | def main(): 12 | check_version() 13 | 14 | args = OptionParser.argument_parser() 15 | 16 | if len(args.dorks) < 1: 17 | print(f"""{Bgcolor.RED}error{Bgcolor.DEFAULT}: the following required arguments were not provided: 18 | --dorks 19 | 20 | usage: godork --dorks 21 | 22 | For more information, try 'godork --help'""") 23 | return 24 | 25 | scrape = Scraper( 26 | dorks=args.dorks, 27 | proxy=args.proxy, 28 | debug=args.debug, 29 | retries=args.retries, 30 | max_retries=args.max_retries, 31 | headless_mode=args.no_headless 32 | ) 33 | 34 | try: 35 | asyncio.run(scrape.run_with_async()) 36 | except KeyboardInterrupt: 37 | print(f"\r{Console().text_format('info', msg='We appreciate your use of our tool ;) Goodbye!')}") 38 | 39 | if __name__ == '__main__': 40 | main() -------------------------------------------------------------------------------- /src/godork/utils/parse.py: -------------------------------------------------------------------------------- 1 | from urllib.parse import urlparse, parse_qs, unquote 2 | 3 | def get_page_num(url): 4 | """ 5 | This function handles URL parsing, extracts query parameters, gets their values and returns the value 6 | """ 7 | 8 | parsed = urlparse(unquote(url)) 9 | query_params = parse_qs(parsed.query) 10 | 11 | return query_params["start"][0] 12 | 13 | def get_query(url): 14 | """ 15 | This function handles URL parsing, extracts query parameters, gets their values and returns the value 16 | """ 17 | 18 | query_params = parse_qs(urlparse(unquote(str(url))).query) 19 | 20 | try: 21 | query_params = parse_qs(urlparse(query_params["continue"][0]).query) 22 | return query_params["q"][0] 23 | except KeyError: 24 | return query_params["q"][0] 25 | 26 | def set_page_num(num): 27 | """ 28 | This is where the page data is set. 29 | """ 30 | 31 | return int(num) // 10 + 1 32 | 33 | def no_data(data_title): 34 | """ 35 | This function checks if the desired data is not present and returns a boolean value 36 | """ 37 | 38 | try: 39 | return len(data_title) < 1 40 | except: 41 | return False -------------------------------------------------------------------------------- /src/godork/utils/banner.py: -------------------------------------------------------------------------------- 1 | """ 2 | The print_banner function is a simple yet visually impactful component designed to display a startup banner when the program is launched. 3 | It provides users with immediate version information and the current status of the tool in an aesthetically styled format using ASCII art. 4 | 5 | Purpose: 6 | 7 | * The function is primarily used to enhance user experience by visually indicating: 8 | * The tool's name or identity (in this case, associated with thd3r & societyprojects) 9 | * The current version of the tool (CURRENT_VERSION) 10 | * The status of the tool (e.g., latest, outdated, or other custom labels) 11 | 12 | This shows a banner when the program starts. 13 | 14 | """ 15 | 16 | def print_banner(status, version): 17 | banner = rf""" 18 | __ __ 19 | ___ ____ ___/ /__ ____/ /__ 20 | / _ `/ _ \/ _ / _ \/ __/ '_/ {version} 21 | \_, /\___/\_,_/\___/_/ /_/\_\ {status} 22 | /___/ 23 | thd3r & societyprojects 24 | """ 25 | print(banner) 26 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | from setuptools import setup, find_packages 4 | 5 | version = re.search( 6 | r'^CURRENT_VERSION\s*=\s*"(.*)"', 7 | open('src/godork/services/version.py').read(), 8 | re.M 9 | ).group(1) 10 | 11 | setup( 12 | name='godork', 13 | version=version, 14 | author='Thunder (@thd3r)', 15 | author_email='thd3r@proton.me', 16 | description='Advanced & Fast Google Dorking Tool', 17 | packages=find_packages(where='src'), 18 | package_dir={'godork': 'src/godork'}, 19 | install_requires=[ 20 | 'bs4', 21 | 'rich', 22 | 'pydub', 23 | 'psutil', 24 | 'aiohttp', 25 | 'asyncio', 26 | 'selenium', 27 | 'setuptools', 28 | 'SpeechRecognition', 29 | 'webdriver-manager', 30 | 'undetected-chromedriver', 31 | ], 32 | entry_points={ 33 | 'console_scripts': [ 34 | 'godork = godork.godork:main' 35 | ] 36 | }, 37 | license='MIT', 38 | url='https://github.com/thd3r/godork', 39 | long_description=open('README.md').read(), 40 | long_description_content_type='text/markdown', 41 | keywords=['godork', 'google dorks', 'google dorking'], 42 | classifiers=( 43 | 'Development Status :: 4 - Beta', 44 | 'Natural Language :: English', 45 | 'Programming Language :: Python :: 3', 46 | ) 47 | ) 48 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # Use a slim base image 2 | FROM python:3.12-slim 3 | 4 | # Set environment variables 5 | ENV PYTHONUNBUFFERED=1 \ 6 | PIP_NO_CACHE_DIR=1 \ 7 | PIP_DISABLE_PIP_VERSION_CHECK=1 8 | 9 | # Install system dependencies 10 | RUN apt-get update && apt-get install -y \ 11 | git \ 12 | wget \ 13 | ffmpeg \ 14 | unzip \ 15 | && apt-get clean \ 16 | && python -m pip install --upgrade pip 17 | 18 | # Set working directory 19 | WORKDIR /app 20 | 21 | # Download Chrome browser and install 22 | RUN wget https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb \ 23 | && apt -f install \ 24 | && apt-get install ./google-chrome-stable_current_amd64.deb -y 25 | 26 | # Take the Chrome version and put it through the files 27 | RUN google-chrome --version | cut -d ' ' -f3 | while read -r line; do echo $line > /tmp/google-version.txt; done 28 | 29 | # Download chromedriver based on Chrome browser version 30 | RUN cat /tmp/google-version.txt | while read -r version; do wget https://storage.googleapis.com/chrome-for-testing-public/$version/linux64/chromedriver-linux64.zip; done 31 | 32 | # Extract chromedriver and move the path 33 | RUN unzip /app/chromedriver-linux64.zip && cp /app/chromedriver-linux64/chromedriver /usr/bin 34 | 35 | # Remove tracks 36 | RUN rm /app/google-chrome-stable_current_amd64.deb && rm -rf /app/chromedriver-linux64 37 | 38 | # Copy the entire project 39 | COPY . /app/ 40 | 41 | # Install dependencies using pip 42 | RUN pip install -r requirements.txt 43 | 44 | # Install the godork tool 45 | RUN python setup.py install 46 | 47 | # Set entrypoint 48 | ENTRYPOINT ["godork"] 49 | -------------------------------------------------------------------------------- /src/godork/utils/user_agents.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | USER_AGENTS = [ 4 | "Mozilla/5.0 (Linux x86_64; rv:91.0) Gecko/20100101 Firefox/91.0", 5 | "Mozilla/5.0 (Linux x86_64; rv:96.0) Gecko/20100101 Firefox/96.0", 6 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:57.0) Gecko/20100101 Firefox/57.0", 7 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 12_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.99 Safari/537.36", 8 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 12.1; rv:91.0) Gecko/20100101 Firefox/91.0", 9 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 12.1; rv:96.0) Gecko/20100101 Firefox/96.0", 10 | "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36", 11 | "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36", 12 | "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.99 Safari/537.36", 13 | "Mozilla/5.0 (Windows NT 10.0; WOW64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.99 Safari/537.36 OPR/83.0.4254.16", 14 | "Mozilla/5.0 (X11; Fedora; Linux x86_64; rv:57.0) Gecko/20100101 Firefox/57.0", 15 | "Mozilla/5.0 (X11; Fedora; Linux x86_64; rv:91.0) Gecko/20100101 Firefox/91.0", 16 | "Mozilla/5.0 (X11; Fedora; Linux x86_64; rv:96.0) Gecko/20100101 Firefox/96.0", 17 | "Mozilla/5.0 (X11; Linux i686; rv:91.0) Gecko/20100101 Firefox/91.0", 18 | "Mozilla/5.0 (X11; Linux i686; rv:96.0) Gecko/20100101 Firefox/96.0", 19 | "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.108 Safari/537.36", 20 | "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36", 21 | "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36", 22 | "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.99 Safari/537.36", 23 | "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.99 Safari/537.36 OPR/83.0.4254.16", 24 | "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/63.0.3239.84 Chrome/63.0.3239.84 Safari/537.36", 25 | "Mozilla/5.0 (X11; Linux x86_64; rv:57.0) Gecko/20100101 Firefox/57.0", 26 | "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:91.0) Gecko/20100101 Firefox/91.0", 27 | "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:96.0) Gecko/20100101 Firefox/96.0" 28 | ] 29 | 30 | random_agent = random.choice(USER_AGENTS) -------------------------------------------------------------------------------- /src/godork/helpers/options.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | class OptionParser: 4 | 5 | """ 6 | The OptionParser class is a lightweight, static utility that serves as the command-line interface (CLI) parser for the GoDork tool. 7 | It leverages Python's built-in argparse module to handle user input from the terminal, making the tool both flexible and easy to configure. 8 | 9 | Purpose: 10 | 11 | * This class is responsible for: 12 | 13 | - Defining all supported command-line options 14 | - Validating and parsing input from the terminal 15 | - Returning the parsed arguments for use within the application 16 | 17 | Key Features: 18 | 19 | 1. argument_parser() (static method) 20 | 21 | * This is the core method of the class. 22 | It creates and configures an ArgumentParser instance with several options to tailor the scraping behavior. 23 | * The method returns a parsed Namespace object that contains all user-specified or default values. This object is then used throughout the tool to control execution flow and feature toggles. 24 | 25 | """ 26 | 27 | @staticmethod 28 | def argument_parser(): 29 | parser = argparse.ArgumentParser( 30 | prog="godork", 31 | usage="%(prog)s [OPTIONS] " 32 | ) 33 | parser.add_argument( 34 | "-v", 35 | "--version", 36 | action="version", 37 | version=f"%(prog)s 2.0.5", 38 | ) 39 | parser.add_argument( 40 | "-d", 41 | "--dorks", 42 | action="store", 43 | default="", 44 | help="single dork or file containing multiple dorks" 45 | ) 46 | parser.add_argument( 47 | "-p", 48 | "--proxy", 49 | action="store", 50 | help="http proxy to use with godork (e.g. http://127.0.0.1:8080)" 51 | ) 52 | parser.add_argument( 53 | "--retries", 54 | type=int, 55 | action="store", 56 | default=40, 57 | help="retries when request is blocked (default: 40)" 58 | ) 59 | parser.add_argument( 60 | "--max-retries", 61 | type=int, 62 | action="store", 63 | default=2, 64 | help="max attempts to bypass protection mechanisms (default: 2)" 65 | ) 66 | parser.add_argument( 67 | "--debug", 68 | action="store_true", 69 | default=False, 70 | help="show detailed logs and error for debugging" 71 | ) 72 | parser.add_argument( 73 | "--no-headless", 74 | action="store_false", 75 | default=True, 76 | help="run in graphical mode when bypassing" 77 | ) 78 | 79 | return parser.parse_args() -------------------------------------------------------------------------------- /src/godork/services/driver.py: -------------------------------------------------------------------------------- 1 | import undetected_chromedriver as uc 2 | 3 | from ..utils.user_agents import random_agent 4 | 5 | from webdriver_manager.chrome import ChromeDriverManager 6 | from selenium.webdriver import ChromeService 7 | 8 | CHROME_DRIVER_PATH = ChromeDriverManager().install() 9 | 10 | class SeleniumDriver: 11 | 12 | """ 13 | The SeleniumDriver class is designed to manage the creation and configuration of a Selenium WebDriver instance for automated web browsing. 14 | It utilizes undetected-chromedriver (uc) to handle interactions with Chrome in a way that minimizes the chance of detection by websites using anti-bot mechanisms. 15 | 16 | Key Features: 17 | 18 | 1. Initialization (__init__): 19 | 20 | * The class accepts a headless_mode argument that determines whether the browser will run in headless mode (without a visible UI). 21 | * Initializes a driver attribute set to None at the start. 22 | 23 | 2. Context Manager (__enter__): 24 | 25 | * When entering the context (via a with statement), the class configures the Chrome browser by setting up ChromeService with an automatically downloaded driver using ChromeDriverManager. 26 | * Configures the Chrome options for the WebDriver: 27 | 28 | - Disables automation flags to avoid detection (--disable-blink-features=AutomationControlled). 29 | - Disables unnecessary features like extensions and GPU usage for better performance. 30 | - Sets a custom user-agent string (likely to simulate a real browser environment). 31 | - Optionally enables headless mode based on the headless_mode flag. 32 | - Creates a Chrome WebDriver instance (uc.Chrome), applies the configurations, and sets a page load timeout of 10 seconds. 33 | - Returns the WebDriver instance for use within the with block. 34 | 35 | 3. Exit (__exit__): 36 | 37 | * The __exit__ method is a placeholder that ensures proper cleanup and exit behavior when leaving the context. 38 | Currently, it does nothing but could be expanded for proper resource management (e.g. closing the driver). 39 | 40 | """ 41 | 42 | def __init__(self, headless_mode:bool): 43 | self.headless = headless_mode 44 | self.driver = None 45 | 46 | def __enter__(self): 47 | chrome_service = ChromeService(CHROME_DRIVER_PATH) 48 | 49 | options = uc.ChromeOptions() 50 | options.add_argument("--disable-blink-features=AutomationControlled") 51 | options.add_argument("--disable-extensions") 52 | options.add_argument("--disable-gpu") 53 | options.add_argument("--disable-dev-shm-usage") 54 | options.add_argument("--no-sandbox") 55 | options.add_argument(f"--user-agent={random_agent}") 56 | 57 | if self.headless: 58 | options.add_argument("--headless=new") 59 | 60 | self.driver = uc.Chrome(service=chrome_service, options=options) 61 | self.driver.set_page_load_timeout(10) 62 | return self.driver 63 | 64 | def __exit__(self, exc_type, exc_val, exc_tb): 65 | if self.driver: 66 | self.driver.quit() 67 | -------------------------------------------------------------------------------- /src/godork/helpers/extractor.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | from ..utils.colors import Bgcolor 4 | from ..utils.parse import no_data 5 | from .console import Console 6 | from ..utils.exceptions import GodorkNoData 7 | 8 | from urllib.parse import urlparse, unquote 9 | from datetime import datetime 10 | from bs4 import BeautifulSoup 11 | 12 | def extract_pages(html): 13 | """ 14 | This function will use a pattern to extract each available page and will return the last page. 15 | """ 16 | 17 | pages = re.findall(r'aria-label=\"Page ([0-9]+)\"', html) 18 | return pages[-1] 19 | 20 | def extract_title(html): 21 | """ 22 | This function extracts each title based on the

tag and adds the title data to a list. It then returns a list containing the title content. 23 | """ 24 | 25 | data_title = [] 26 | soup = BeautifulSoup(html, "html.parser") 27 | 28 | for title in soup.find_all("h3"): 29 | if not re.search("Google Search Console|Google Search", title.getText()): 30 | data_title.append(title.getText().strip()) 31 | 32 | return data_title 33 | 34 | def extract_link(text): 35 | """ 36 | This function extracts all available links from the search results by applying various patterns to assist in the extraction. 37 | It also checks if a domain is part of the excluded domains list. The function returns a list of links. 38 | """ 39 | 40 | data_links = [] 41 | exclude_domains = re.findall(r'https?://([a-zA-Z0-9\-.]+\.google\.com)', text) 42 | 43 | pattern = re.compile( 44 | r'\"> 0 and len(data_links) > 0: 72 | reports.logs_report("info", data=f"Found {len(data_title)} title and {len(data_links)} links on page {num_page}") 73 | Console().log_print("info", msg=f"Found {len(data_title)} title and {len(data_links)} links on page {num_page}") 74 | 75 | reports.json_report({ 76 | "timestamp": str(datetime.now()), 77 | "query": query, 78 | "page": num_page, 79 | "size_page": len(html), 80 | "data_output": { 81 | "title": data_title, 82 | "links": data_links 83 | }, 84 | }) 85 | 86 | for i, title in enumerate(data_title): 87 | try: 88 | print(f"{title} [{Bgcolor.GREEN}{data_links[i]}{Bgcolor.DEFAULT}]") 89 | except IndexError: 90 | pass -------------------------------------------------------------------------------- /src/godork/helpers/reports.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | 4 | from datetime import datetime 5 | from .console import Console 6 | 7 | class Reports: 8 | 9 | """ 10 | The Reports class is a centralized utility designed to manage logging and reporting for the GoDork tool. 11 | Its primary role is to handle output storage by writing execution logs and structured JSON results to organized directories, ensuring traceability and easy analysis of scraping sessions. 12 | 13 | Purpose: 14 | 15 | * This class automates the creation, formatting, and saving of: 16 | 17 | - Log files in plain text (human-readable) 18 | - JSON reports for structured, machine-readable data 19 | - Organized directories for persistent reporting 20 | 21 | Key Features: 22 | 23 | 1. Initialization (__init__) 24 | 25 | * Upon initialization: 26 | 27 | - Determines the appropriate temp directory (Windows or Unix-based systems) 28 | - Sets up paths for logs and JSON reports using timestamps 29 | - Automatically creates required directories (logs and json) under /tmp/godork/reports (or %TEMP%/godork/reports on Windows) 30 | - Initializes the Console utility for consistent and colored terminal output 31 | 32 | 2. write_file_json(filename, data) 33 | 34 | * Appends structured data to a JSON file. Ideal for storing detailed metadata or search results. 35 | 36 | 3. write_file_text(filename, data) 37 | 38 | * Appends plain text to a given file. Primarily used for saving logs and console-style outputs. 39 | 40 | 4. logs_report(status, data) 41 | 42 | * Handles writing formatted log entries (with timestamps and status levels like INFO, ERROR, DEBUG) to the log file. Uses the Console class for formatting consistency. 43 | 44 | 5. json_report(data) 45 | 46 | * Writes a JSON entry to the report file. Useful for capturing individual result items in structured form. 47 | 48 | Report Paths: 49 | 50 | * Logs: Saved under reports/logs/ with timestamped filenames. 51 | * JSON: Saved under reports/json/ for structured result data. 52 | 53 | Error Handling: 54 | 55 | * Both logs_report() and json_report() include internal exception handling. If writing to a file fails, an error is printed to the console, ensuring that such failures are visible but non-fatal. 56 | 57 | """ 58 | 59 | def __init__(self): 60 | self.temp_dir = os.getenv("TEMP") if os.name == "nt" else "/tmp" 61 | self.base_dir = f"{self.temp_dir}/godork/reports" 62 | 63 | self.log_file = f"{self.base_dir}/logs/{str(datetime.now().strftime('%Y-%m-%d-%H:%M:%S'))}_godork.log" 64 | self.json_file = f"{self.base_dir}/json/{str(datetime.now().strftime('%Y-%m-%d-%H:%M:%S'))}_godork.json" 65 | 66 | self.console = Console() 67 | 68 | try: 69 | os.makedirs(f"{self.base_dir}/logs") 70 | os.makedirs(f"{self.base_dir}/json") 71 | except FileExistsError: 72 | self.base_dir = self.base_dir 73 | 74 | def write_file_json(self, filename, data): 75 | with open(filename, "at") as f: 76 | try: 77 | f.write(json.dumps(data, indent=4, ensure_ascii=False) + os.linesep) 78 | finally: 79 | f.close() 80 | 81 | def write_file_text(self, filename, data): 82 | with open(filename, "at") as f: 83 | try: 84 | f.write(str(data) + os.linesep) 85 | finally: 86 | f.close() 87 | 88 | def logs_report(self, status, data): 89 | try: 90 | self.write_file_text(self.log_file, data=self.console.out_log_format(status, msg=data)) 91 | except Exception as err: 92 | self.console.log_print("error", msg=err) 93 | 94 | def json_report(self, data): 95 | try: 96 | self.write_file_json(self.json_file, data=data) 97 | except Exception as err: 98 | self.console.log_print("error", msg=err) -------------------------------------------------------------------------------- /src/godork/services/version.py: -------------------------------------------------------------------------------- 1 | import json 2 | import requests 3 | 4 | from .requester import Requester 5 | from ..utils.banner import print_banner 6 | from ..utils.colors import Bgcolor 7 | 8 | """ 9 | The release_version() function is a simple yet effective mechanism for retrieving the latest release version of a tool from GitHub. 10 | It ensures that users are informed about the availability of newer versions and provides a fallback in case the release information cannot be fetched. 11 | This function is crucial for maintaining the tool up to date, offering smooth integration with GitHub's release system and preventing potential downtime or errors caused by outdated versions. 12 | 13 | Key Elements: 14 | 15 | 1. CURRENT_VERSION: 16 | 17 | * This constant represents the current, locally installed version of the software tool. 18 | * This version is used as a fallback in the event that the latest release cannot be fetched from the GitHub repository. 19 | 20 | 2. release_version() Function: 21 | 22 | * This function checks the latest release version of the "godork" tool from its official GitHub repository. 23 | * It performs the following steps: 24 | 25 | - Creating a Session: A session is created using the requests.session() to enable persistent connections, allowing efficient HTTP requests. 26 | - Making a GET Request: The function sends a GET request to GitHub's API endpoint that provides details about the latest release (https://api.github.com/repos/thd3r/godork/releases/latest). 27 | 28 | * Handling the Response: 29 | 30 | - If the request is successful, the response is expected to be in JSON format. The json.loads() function is used to parse the response body. 31 | - The parsed data contains various details about the latest release, including the tag name (which represents the version) and the release notes (body). These are returned from the function. 32 | 33 | * Error Handling: 34 | 35 | - If the request fails for any reason (e.g., network issues, API issues), 36 | the function catches the exception and returns the current local version (CURRENT_VERSION) along with None for the release notes. 37 | 38 | Key Points: 39 | 40 | * GitHub API Integration: The function leverages GitHub's API to fetch the latest release information for the "godork" tool, ensuring that users can easily stay up to date with the latest version. 41 | * Fallback Mechanism: If there are any issues fetching the release version, the current local version is returned to avoid errors in the application. 42 | * Error Handling: The use of a try-except block ensures that even if the request to the GitHub API fails, the program will continue running without crashing, and the user will receive information about the current version of the software. 43 | 44 | Usage Scenario: 45 | 46 | This function is typically used as part of a larger update management system, where it can be called to check whether a new release is available for the software. 47 | If a newer version is found, it can trigger an update process, or if no update is found, the system can reassure the user that they are already using the latest version. 48 | 49 | """ 50 | 51 | CURRENT_VERSION = "v2.6.2" 52 | 53 | def check_version(): 54 | release_vers, _ = release_version() 55 | if release_vers is not None and CURRENT_VERSION < release_vers: 56 | print_banner(status=f"{Bgcolor.RED}outdated{Bgcolor.DEFAULT}", version=CURRENT_VERSION) 57 | if release_vers is not None and CURRENT_VERSION == release_vers: 58 | print_banner(status=f"{Bgcolor.GREEN}latest{Bgcolor.DEFAULT}", version=CURRENT_VERSION) 59 | if release_vers is None: 60 | print_banner(status=f"{Bgcolor.RED}outdated{Bgcolor.DEFAULT}", version=CURRENT_VERSION) 61 | 62 | def release_version(): 63 | session = requests.session() 64 | try: 65 | response = Requester().reqwest(session, "GET", url="https://api.github.com/repos/thd3r/godork/releases/latest", timeout=10) 66 | data_json = json.loads(response.text) 67 | return data_json["tag_name"], data_json["body"] 68 | except: 69 | return CURRENT_VERSION, None -------------------------------------------------------------------------------- /src/godork/services/requester.py: -------------------------------------------------------------------------------- 1 | from ..utils.user_agents import random_agent 2 | 3 | class Requester: 4 | 5 | """ 6 | The Requester class is a Python utility designed for making HTTP requests with customizable options for both synchronous and asynchronous operations. 7 | It simplifies sending requests with custom headers, proxies, cookies, and additional parameters, while also handling both standard and asynchronous HTTP methods. 8 | 9 | Key Features: 10 | 11 | 1. Initialization (__init__): 12 | 13 | * The class initializes a default set of HTTP headers, including a User-Agent string (which is randomly chosen), Accept, Accept-Language, and Referer. 14 | These headers are typically used to simulate real user traffic, helping to avoid detection by web servers or bot protection mechanisms. 15 | * It also initializes an empty dictionary, response_dict, to store response content asynchronously. 16 | 17 | 2. HTTP Request (Synchronous) - reqwest: 18 | 19 | * This method sends a synchronous HTTP request using the session.request() function from the requests library. 20 | * It takes various arguments: 21 | 22 | - method: The HTTP method (e.g., GET, POST). 23 | - url: The target URL for the request. 24 | - Additional keyword arguments (kwargs) include optional parameters such as proxies, request parameters (params), timeouts, cookies, custom headers, and the ability to allow redirects. 25 | 26 | * The method sends the request and returns the response object. 27 | * This method is ideal for situations where blocking operations (synchronous requests) are acceptable. 28 | 29 | 3. HTTP Request (Asynchronous) - aioreqwest: 30 | 31 | * This method allows for asynchronous HTTP requests using aiohttp, making it more suitable for high-performance web scraping or API interactions that require non-blocking calls. 32 | * The function accepts similar parameters as the synchronous version, with the main difference being the use of async with to handle the asynchronous nature of the request. 33 | * Upon receiving the response, it updates response_dict with the body content of the response, allowing asynchronous access to the data. 34 | The function then returns the response object, providing an efficient way to handle multiple requests concurrently. 35 | * This method is ideal for situations requiring non-blocking I/O operations, such as when dealing with large-scale web scraping or API calls. 36 | 37 | The class leverages both requests for traditional synchronous requests and aiohttp for asynchronous tasks, offering flexibility depending on the needs of the application. 38 | 39 | """ 40 | 41 | def __init__(self): 42 | self.headers = { 43 | "User-Agent": str(random_agent), 44 | "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 45 | "Accept-Language": "en-US,en;q=0.5", 46 | "Referer": "https://www.google.com/", 47 | } 48 | 49 | def reqwest(self, session, method, url, **kwargs): 50 | response = session.request( 51 | method=method, 52 | url=url, 53 | proxies=kwargs.get("proxy"), 54 | params=kwargs.get("params"), 55 | timeout=kwargs.get("timeout"), 56 | cookies=kwargs.get("cookies"), 57 | headers=self.headers if not kwargs.get("headers") else kwargs.get("headers"), 58 | allow_redirects=kwargs.get("redirects") 59 | ) 60 | return response 61 | 62 | async def aioreqwest(self, session, method, url, **kwargs): 63 | async with session.request( 64 | method=method, 65 | url=url, 66 | proxy=kwargs.get("proxy"), 67 | params=kwargs.get("params"), 68 | timeout=kwargs.get("timeout"), 69 | cookies=kwargs.get("cookies"), 70 | headers=self.headers if not kwargs.get("headers") else kwargs.get("headers"), 71 | allow_redirects=kwargs.get("redirects") 72 | ) as response: 73 | body = await response.text() 74 | return response, body -------------------------------------------------------------------------------- /src/godork/helpers/console.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | 3 | from ..utils.colors import Bgcolor 4 | 5 | class Console: 6 | 7 | """ 8 | The Console class provides a lightweight and extensible logging utility for managing output in the terminal with structured formatting, colors, and real-time timestamps. 9 | It's designed to enhance the developer and user experience by improving readability, debugging, and log categorization during runtime. 10 | 11 | Purpose: 12 | 13 | * This class handles all console-related operations for: 14 | 15 | - Structured logging with colored labels (INFO, ERROR, DEBUG, etc.) 16 | - Debug control, allowing conditional logging based on a debug flag 17 | - Human-readable messages with timestamps 18 | - Graceful error handling for incorrect log usage 19 | 20 | Key Features: 21 | 22 | 1. debugging(self, debug, msg) 23 | 24 | * Conditionally logs a debug message only if the debug flag is True. 25 | * Uses log_print() to format and display the message in a consistent "DEBUG" style. 26 | * Helps developers toggle verbose logs without modifying other parts of the code. 27 | 28 | 2. log_print(self, status, msg) 29 | 30 | * Main method to output a log message with timestamp and status level. 31 | * Calls out_log_format() to structure the message before printing. 32 | 33 | 3. out_log_format(self, status, msg) 34 | 35 | * Formats log messages with timestamps, colors, and labeled tags (INFO, ERROR, DEBUG, WARNING). 36 | * Accepts a status string to determine the log level. 37 | * Each status type is styled using ANSI escape codes (through Bgcolor) for color coding. 38 | * Falls back to an error message if an unknown status is passed, and terminates the program. 39 | 40 | 4. text_format(self, status, msg) 41 | 42 | * Similar to out_log_format(), but without timestamps. 43 | * Intended for simpler, inline use when a timestamp isn't required. 44 | * Returns a stylized message string, ideal for banners, summaries, or compact logs. 45 | 46 | Error Handling: 47 | 48 | * Both out_log_format and text_format include checks to ensure only supported status values are used. If not, they: 49 | 50 | - Log an error message. 51 | - Exit the program with status code 1. 52 | 53 | """ 54 | 55 | def debugging(self, debug, msg): 56 | if debug == True: 57 | self.log_print("debug", msg=f"{Bgcolor.GRAY}{msg}{Bgcolor.DEFAULT}") 58 | 59 | def log_print(self, status, msg): 60 | print(self.out_log_format(status, msg)) 61 | 62 | def out_log_format(self, status, msg): 63 | log_time = str(datetime.now().strftime('%Y/%m/%d %H:%M:%S')) 64 | 65 | if status.lower() == "info": 66 | detailed_info = f"[{Bgcolor.CYAN}{log_time}{Bgcolor.DEFAULT}] [{Bgcolor.BLUE}INFO{Bgcolor.DEFAULT}] {msg}" 67 | return detailed_info 68 | 69 | if status.lower() == "error": 70 | detailed_error = f"[{Bgcolor.CYAN}{log_time}{Bgcolor.DEFAULT}] [{Bgcolor.RED}EROR{Bgcolor.DEFAULT}] {msg}" 71 | return detailed_error 72 | 73 | if status.lower() == "debug": 74 | detailed_debug = f"[{Bgcolor.CYAN}{log_time}{Bgcolor.DEFAULT}] [{Bgcolor.PURPLE}DBUG{Bgcolor.DEFAULT}] {msg}" 75 | return detailed_debug 76 | 77 | if status.lower() == "warning": 78 | detailed_warning = f"[{Bgcolor.CYAN}{log_time}{Bgcolor.DEFAULT}] [{Bgcolor.WARNING}WARN{Bgcolor.DEFAULT}] {msg}" 79 | return detailed_warning 80 | 81 | if status.lower() not in ["info", "debug", "error", "warning"]: 82 | self.log_print(status="error", msg="status=REQUIRED args required with msg:{}".format(msg)) 83 | exit(1) 84 | 85 | def text_format(self, status, msg): 86 | if status.lower() == "info": 87 | detailed_info = f"[{Bgcolor.BLUE}INFO{Bgcolor.DEFAULT}] {msg}" 88 | return detailed_info 89 | 90 | if status.lower() == "error": 91 | detailed_error = f"[{Bgcolor.RED}EROR{Bgcolor.DEFAULT}] {msg}" 92 | return detailed_error 93 | 94 | if status.lower() == "debug": 95 | detailed_debug = f"[{Bgcolor.PURPLE}DBUG{Bgcolor.DEFAULT}] {msg}" 96 | return detailed_debug 97 | 98 | if status.lower() == "warning": 99 | detailed_warning = f"[{Bgcolor.WARNING}WARN{Bgcolor.DEFAULT}] {msg}" 100 | return detailed_warning 101 | 102 | if status.lower() not in ["info", "debug", "error", "warning"]: 103 | self.log_print(status="error", msg="status=REQUIRED args required with msg:{}".format(msg)) 104 | exit(1) -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | !libs/lume/scripts/build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | cover/ 54 | 55 | # Translations 56 | *.mo 57 | *.pot 58 | 59 | # Django stuff: 60 | *.log 61 | local_settings.py 62 | db.sqlite3 63 | db.sqlite3-journal 64 | 65 | # Flask stuff: 66 | instance/ 67 | .webassets-cache 68 | 69 | # Scrapy stuff: 70 | .scrapy 71 | 72 | # Sphinx documentation 73 | docs/_build/ 74 | 75 | # PyBuilder 76 | .pybuilder/ 77 | target/ 78 | 79 | # Jupyter Notebook 80 | .ipynb_checkpoints 81 | 82 | # IPython 83 | profile_default/ 84 | ipython_config.py 85 | 86 | .pdm.toml 87 | .pdm-python 88 | .pdm-build/ 89 | 90 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 91 | __pypackages__/ 92 | 93 | # Celery stuff 94 | celerybeat-schedule 95 | celerybeat.pid 96 | 97 | # SageMath parsed files 98 | *.sage.py 99 | 100 | # Environments 101 | .env 102 | .venv 103 | env/ 104 | venv/ 105 | ENV/ 106 | env.bak/ 107 | venv.bak/ 108 | 109 | # Spyder project settings 110 | .spyderproject 111 | .spyproject 112 | 113 | # Rope project settings 114 | .ropeproject 115 | 116 | # mkdocs documentation 117 | /site 118 | 119 | # mypy 120 | .mypy_cache/ 121 | .dmypy.json 122 | dmypy.json 123 | 124 | # Scripts 125 | server/scripts/ 126 | 127 | # Pyre type checker 128 | .pyre/ 129 | 130 | # pytype static type analyzer 131 | .pytype/ 132 | 133 | # Cython debug symbols 134 | cython_debug/ 135 | 136 | # Ruff stuff: 137 | .ruff_cache/ 138 | 139 | # PyPI configuration file 140 | .pypirc 141 | 142 | # Conda 143 | .conda/ 144 | 145 | # Local environment 146 | .env.local 147 | 148 | # macOS DS_Store 149 | .DS_Store 150 | 151 | weights/ 152 | weights/icon_detect/ 153 | weights/icon_detect/model.pt 154 | weights/icon_detect/model.pt.zip 155 | weights/icon_detect/model.pt.zip.part* 156 | 157 | libs/omniparser/weights/icon_detect/model.pt 158 | 159 | # Example test data and output 160 | examples/test_data/ 161 | examples/output/ 162 | 163 | /screenshots/ 164 | 165 | /experiments/ 166 | 167 | /logs/ 168 | 169 | # Xcode 170 | # 171 | # gitignore contributors: remember to update Global/Xcode.gitignore, Objective-C.gitignore & Swift.gitignore 172 | 173 | ## User settings 174 | xcuserdata/ 175 | 176 | ## Obj-C/Swift specific 177 | *.hmap 178 | 179 | ## App packaging 180 | *.ipa 181 | *.dSYM.zip 182 | *.dSYM 183 | 184 | ## Playgrounds 185 | timeline.xctimeline 186 | playground.xcworkspace 187 | 188 | # Swift Package Manager 189 | # 190 | # Add this line if you want to avoid checking in source code from Swift Package Manager dependencies. 191 | # Packages/ 192 | # Package.pins 193 | # Package.resolved 194 | # *.xcodeproj 195 | # 196 | # Xcode automatically generates this directory with a .xcworkspacedata file and xcuserdata 197 | # hence it is not needed unless you have added a package configuration file to your project 198 | .swiftpm/ 199 | .build/ 200 | 201 | # CocoaPods 202 | # 203 | # We recommend against adding the Pods directory to your .gitignore. However 204 | # you should judge for yourself, the pros and cons are mentioned at: 205 | # https://guides.cocoapods.org/using/using-cocoapods.html#should-i-check-the-pods-directory-into-source-control 206 | # 207 | # Pods/ 208 | # 209 | # Add this line if you want to avoid checking in source code from the Xcode workspace 210 | # *.xcworkspace 211 | 212 | # Carthage 213 | # 214 | # Add this line if you want to avoid checking in source code from Carthage dependencies. 215 | # Carthage/Checkouts 216 | Carthage/Build/ 217 | 218 | # fastlane 219 | # 220 | # It is recommended to not store the screenshots in the git repo. 221 | # Instead, use fastlane to re-generate the screenshots whenever they are needed. 222 | # For more information about the recommended setup visit: 223 | # https://docs.fastlane.tools/best-practices/source-control/#source-control 224 | fastlane/report.xml 225 | fastlane/Preview.html 226 | fastlane/screenshots/**/*.png 227 | fastlane/test_output 228 | 229 | # Ignore folder 230 | ignore 231 | 232 | # .release 233 | .release/ 234 | 235 | # Shared folder 236 | shared 237 | 238 | # Trajectories 239 | trajectories/ 240 | 241 | # Installation ID Storage 242 | .storage/ 243 | 244 | # Gradio settings 245 | .gradio_settings.json 246 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

2 | Godork - Advanced & Fast Google Dorking Tool 3 |

4 | 5 |
6 | 7 | 8 | 9 | 10 | 11 |
12 | 13 | ```sh 14 | __ __ 15 | ___ ____ ___/ /__ ____/ /__ 16 | / _ `/ _ \/ _ / _ \/ __/ '_/ v2.6.2 17 | \_, /\___/\_,_/\___/_/ /_/\_\ latest 18 | /___/ 19 | thd3r & societyprojects 20 | ``` 21 | 22 | **Godork** is a high-performance tool designed to scrape links and titles from Google search results using the [asyncio](https://docs.python.org/3/library/asyncio.html) library, which enables efficient cooperative multitasking. Combined with [aiohttp](https://docs.aiohttp.org), this tool allows you to quickly and reliably extract URLs along with their corresponding titles. Additionally, Godork is capable of bypassing restrictions imposed by network providers, ensuring uninterrupted access to search data 23 | 24 | ## ✨ Why Godork? 25 | 26 | * ⚡ Blazing-fast performance using asynchronous HTTP requests (aiohttp) 27 | 28 | * 🔍 Automated dork execution with support for lists, batches, and single queries 29 | 30 | * 🌐 Proxy-ready: Bypass restrictions and stay anonymous with HTTP proxy integration 31 | 32 | * 🕶️ Headless browser mode with Selenium to defeat CAPTCHAs and JS-based blocks 33 | 34 | * 🐳 Docker-compatible: Seamlessly containerize and deploy in any environment 35 | 36 | ## Resources 37 | - [Requirements](#requirements) 38 | - [Installation](#installation) 39 | - [Install with pip](#install-with-pip) 40 | - [Options](#options) 41 | - [Example Usage](#example-usage) 42 | - [Basic dorking](#basic-dorking) 43 | - [Batch mode](#batch-mode) 44 | - [Help & Bugs](#help--bugs) 45 | - [Contributors](#contributors-heart) 46 | - [License](#license) 47 | - [Support](#support) 48 | 49 | 50 | ## Requirements 51 | 52 | ```sh 53 | # This is required for the pydub library 54 | $ sudo apt install ffmpeg 55 | 56 | # Check the version of the google-chrome browser 57 | $ google-chrome --version 58 | 59 | # If the browser version does not exist run this command 60 | $ wget https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb 61 | $ sudo apt -f install 62 | $ sudo dpkg -i google-chrome-stable_current_amd64.deb 63 | 64 | # After that, take the version from your google-chrome browser and place it here 65 | $ wget https://storage.googleapis.com/chrome-for-testing-public/{PUT_THAT_VERSION_HERE}/linux64/chromedriver-linux64.zip 66 | $ unzip chromedriver-linux64.zip 67 | $ cd chromedriver-linux64 68 | $ sudo mv chromedriver /usr/bin 69 | ``` 70 | 71 | ## Installation 72 | 73 | **Godork** requires **python 3.8** or higher to install successfully 74 | 75 | ### Install with pip: 76 | 77 | ```sh 78 | pip install godork 79 | ``` 80 | 81 | ## Options 82 | 83 | | Option | Type | Description | 84 | |-------------------|--------------|------------------------------------------------| 85 | | -v, --version | flag | displays the current version of godork | 86 | | -d, --dorks | string | single dork or file containing multiple dorks | 87 | | -p, --proxy | string | http proxy to use with godork (e.g. http://127.0.0.1:8080) | 88 | | --retries | integer | retries when request is blocked (default: 40) | 89 | | --max-retries | integer | max attempts to bypass protection mechanisms (default: 2) | 90 | | --debug | boolean | show detailed logs and error for debugging | 91 | | --no-headless | boolean | run in graphical mode when bypassing | 92 | 93 | ## Example Usage 94 | 95 | ### Basic dorking: 96 | 97 | ```sh 98 | godork --dorks "intitle:index.of site:example.com" 99 | ``` 100 | 101 | > [!WARNING] 102 | > Developers assume no liability and are not responsible for any issue or damage. 103 | 104 | ### Batch mode: 105 | 106 | ```sh 107 | godork --dorks dorks.txt --proxy http://127.0.0.1:8080 --no-headless 108 | ``` 109 | 110 | ## Help & Bugs 111 | 112 | If you are still confused or found a bug, please [open the issue](https://github.com/thd3r/godork/issues). All bug reports are appreciated, some features have not been tested yet due to lack of free time. 113 | 114 | ## Contributors :heart: 115 | 116 |

117 | 118 |

119 | 120 | ## License 121 | 122 | Licensed under the [MIT License](https://github.com/thd3r/godork/blob/main/LICENSE.md). 123 | 124 | Contributions are welcome :) feel free to fork, suggest improvements, or submit pull requests. 125 | 126 | ## Support 127 | 128 | Buy Me A Coffee 129 | -------------------------------------------------------------------------------- /src/godork/services/recaptcha.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import pydub 4 | import urllib 5 | import asyncio 6 | import speech_recognition 7 | 8 | from tempfile import gettempdir 9 | from datetime import datetime 10 | 11 | from ..helpers.console import Console 12 | from ..helpers.reports import Reports 13 | from ..utils.colors import Bgcolor 14 | from ..utils.exceptions import GodorkException, GodorkTimeout 15 | 16 | from selenium.webdriver.common.by import By 17 | from selenium.webdriver.common.keys import Keys 18 | from selenium.webdriver.support.ui import WebDriverWait 19 | from selenium.webdriver.support import expected_conditions as EC 20 | from selenium.common.exceptions import TimeoutException, NoSuchElementException 21 | 22 | class RecaptchaBypass: 23 | 24 | """ 25 | The RecaptchaBypass class is an advanced solution for bypassing reCAPTCHA v2 challenges, specifically designed for handling audio-based CAPTCHAs. 26 | Using Selenium, undetected-chromedriver, and various media-processing libraries, it automates the process of solving the CAPTCHA by downloading, converting, and transcribing audio challenges. 27 | The class also incorporates detailed logging, debugging, and error handling to ensure smooth and efficient operation, even in environments with strict bot protection. 28 | 29 | Key Features: 30 | 31 | 1. Initialization (__init__): 32 | 33 | * The class is initialized with debug and headless_mode flags, allowing control over the debugging output and 34 | whether the browser runs in headless mode (without a visible UI). 35 | * It also initializes instances of Console and Reports to handle logging and reporting during execution. 36 | 37 | 2. reCAPTCHA Handling (recaptcha_service): 38 | 39 | * This asynchronous method automates the process of solving a reCAPTCHA by interacting with the CAPTCHA iframe, clicking the checkbox, and navigating through the audio challenge. 40 | * The method performs several actions in sequence: 41 | 42 | - Switching to the reCAPTCHA iframe. 43 | - Clicking the reCAPTCHA checkbox and waiting for it to become clickable. 44 | - Switching back to the default frame. 45 | - Locating the audio challenge iframe and clicking the audio button. 46 | - Retrieving the audio source URL. 47 | - Downloading, converting, and decoding the audio to extract the CAPTCHA key. 48 | - Entering the transcribed key and submitting the response. 49 | 50 | 3. Audio CAPTCHA Processing: 51 | 52 | * The handle_audio_captcha method is responsible for downloading the audio file, converting it from MP3 to WAV format, and 53 | decoding it to extract the text. The transcription is handled using the speech_recognition library. 54 | * Temporary files (MP3 and WAV) are cleaned up after processing to ensure no unnecessary files remain on the system. 55 | 56 | 4. Audio Download and Conversion: 57 | 58 | * The download_audio method downloads the audio file to a temporary directory. 59 | * The convert_mp3_to_wav method converts the downloaded MP3 audio to WAV format using the pydub library. 60 | 61 | 5. Transcription and Cleanup: 62 | 63 | * The audio is transcribed using the Google Speech Recognition API. If transcription fails, appropriate exceptions are raised. 64 | * Temporary audio files are cleaned up after the process, ensuring proper resource management. 65 | 66 | 6. Error Handling: 67 | 68 | * If any CAPTCHA challenge cannot be completed, or if the IP address is blocked, relevant error messages are logged and displayed. 69 | The system gracefully handles exceptions like NoSuchElementException and TimeoutException, ensuring robust operation. 70 | 71 | 7. IP Blocking Detection (is_blocked): 72 | 73 | * This method checks if the IP address has been blocked by detecting the "captcha body text" indicating a block. 74 | It helps in identifying if reCAPTCHA protection is preventing further attempts. 75 | 76 | 8. Solve CAPTCHA (solve_captcha): 77 | 78 | * This asynchronous method accepts a URL, launches a Selenium WebDriver, and 79 | attempts to solve the CAPTCHA on the page using the previously mentioned methods. 80 | * The process is wrapped in a try-except block to handle errors gracefully, with reports and console logs to provide real-time feedback. 81 | 82 | """ 83 | 84 | def __init__(self, debug:bool, headless_mode:bool): 85 | self.console = Console() 86 | self.reports = Reports() 87 | 88 | self.debug = debug 89 | self.headless = headless_mode 90 | 91 | self.wait = None 92 | 93 | async def recaptcha_service(self, driver): 94 | # Switching to iframe containing reCAPTCHA 95 | self.reports.logs_report("debug", data="Switching to iframe containing reCAPTCHA") 96 | self.console.debugging(self.debug, msg="Switching to iframe containing reCAPTCHA") 97 | 98 | try: 99 | iframe_inner = driver.find_element(By.XPATH, "//iframe[@title='reCAPTCHA']") 100 | driver.switch_to.frame(iframe_inner) 101 | except NoSuchElementException: 102 | raise GodorkException("Failed to locate reCAPTCHA iframe element") 103 | 104 | time.sleep(1) 105 | 106 | # Click on the recaptcha 107 | self.reports.logs_report("debug", data="Clicking the reCAPTCHA checkbox") 108 | self.console.debugging(self.debug, msg="Clicking the reCAPTCHA checkbox") 109 | 110 | try: 111 | self.wait.until( 112 | EC.element_to_be_clickable((By.CSS_SELECTOR, ".rc-anchor-content")) 113 | ).click() 114 | except TimeoutException: 115 | raise GodorkTimeout("Failed to click reCAPTCHA checkbox") 116 | 117 | # Switch back to the default frame 118 | driver.switch_to.default_content() 119 | 120 | time.sleep(1) 121 | 122 | # Locating audio challenge iframe 123 | self.reports.logs_report("debug", data="Locating audio challenge iframe") 124 | self.console.debugging(self.debug, msg="Locating audio challenge iframe") 125 | 126 | try: 127 | iframe = driver.find_element(By.XPATH, "//iframe[contains(@title, 'recaptcha')]") 128 | driver.switch_to.frame(iframe) 129 | except NoSuchElementException: 130 | raise GodorkException("Failed to locate reCAPTCHA iframe element") 131 | 132 | # Click on the audio button 133 | self.reports.logs_report("debug", data="Clicking the audio button") 134 | self.console.debugging(self.debug, msg="Clicking the audio button") 135 | 136 | try: 137 | self.wait.until( 138 | EC.element_to_be_clickable((By.CSS_SELECTOR, "#recaptcha-audio-button")) 139 | ).click() 140 | except TimeoutException: 141 | raise GodorkTimeout("Failed to click audio button") 142 | 143 | time.sleep(1) 144 | 145 | # Wait for the audio source to load 146 | self.reports.logs_report("debug", data="Waiting for the audio source to load completely") 147 | self.console.debugging(self.debug, msg="Waiting for the audio source to load completely") 148 | 149 | try: 150 | audio_source = self.wait.until( 151 | EC.presence_of_element_located((By.CSS_SELECTOR, "#audio-source")) 152 | ) 153 | src = audio_source.get_attribute("src") 154 | self.reports.logs_report("debug", data=f"Getting the audio URL {src}") 155 | self.console.debugging(self.debug, msg=f"Getting the audio URL {src}") 156 | except TimeoutException: 157 | raise GodorkTimeout("Failed to load audio source") 158 | 159 | # Download, convert, and decode audio reCAPTCHA 160 | try: 161 | key = await self.handle_audio_captcha(src) 162 | except (speech_recognition.exceptions.UnknownValueError, speech_recognition.exceptions.RequestError): 163 | raise GodorkException("Failed to recognize") 164 | 165 | # Input the key 166 | self.reports.logs_report("debug", data="Entering the transcribed phrase") 167 | self.console.debugging(self.debug, msg="Entering the transcribed phrase") 168 | 169 | try: 170 | self.wait.until( 171 | EC.presence_of_element_located((By.CSS_SELECTOR, "#audio-response")) 172 | ).send_keys(key.lower()) 173 | except TimeoutException: 174 | raise GodorkTimeout("Failed to input key") 175 | 176 | # Submit the key 177 | self.reports.logs_report("debug", data="Submitting the phrase") 178 | self.console.debugging(self.debug, msg="Submitting the phrase") 179 | 180 | try: 181 | self.wait.until( 182 | EC.presence_of_element_located((By.CSS_SELECTOR, "#audio-response")) 183 | ).send_keys(Keys.RETURN) 184 | except TimeoutException: 185 | raise GodorkTimeout("Failed to submit key") 186 | 187 | # Waiting briefly for reCAPTCHA to process the input 188 | self.reports.logs_report("debug", data="Waiting briefly for reCAPTCHA to process the input") 189 | self.console.debugging(self.debug, msg="Waiting briefly for reCAPTCHA to process the input") 190 | 191 | time.sleep(3) 192 | 193 | if self.is_blocked(driver): 194 | return 195 | 196 | self.console.log_print("info", msg="Successfully bypassed v2 protection") 197 | 198 | async def handle_audio_captcha(self, src_url): 199 | """Main handler to download, convert and decode audio CAPTCHA""" 200 | mp3_path, wav_path = self.get_temp_audio_paths() 201 | 202 | self.download_audio(src_url, mp3_path) 203 | self.convert_mp3_to_wav(mp3_path, wav_path) 204 | 205 | try: 206 | phrase = await self.async_decode_audio(wav_path) 207 | finally: 208 | # Delete temporary files 209 | self.reports.logs_report("debug", data="Deleting temporary audio files") 210 | self.console.debugging(self.debug, msg="Deleting temporary audio files") 211 | self.cleanup_temp_files(mp3_path, wav_path) 212 | 213 | return phrase 214 | 215 | def download_audio(self, src, save_path): 216 | self.reports.logs_report("debug", data="Downloading the audio to the temp folder") 217 | self.console.debugging(self.debug, msg="Downloading the audio to the temp folder") 218 | 219 | urllib.request.urlretrieve(src, save_path) 220 | 221 | def convert_mp3_to_wav(self, mp3_path, wav_path): 222 | self.reports.logs_report("debug", data="Converting MP3 to WAV format") 223 | self.console.debugging(self.debug, msg="Converting MP3 to WAV format") 224 | 225 | sound = pydub.AudioSegment.from_mp3(mp3_path) 226 | sound.export(wav_path, format="wav") 227 | 228 | async def async_decode_audio(self, wav_path): 229 | loop = asyncio.get_event_loop() 230 | return await loop.run_in_executor(None, self.decode_audio, wav_path) 231 | 232 | def decode_audio(self, wav_path): 233 | self.reports.logs_report("debug", data="Transcribing the audio content") 234 | self.console.debugging(self.debug, msg="Transcribing the audio content") 235 | 236 | recognizer = speech_recognition.Recognizer() 237 | with speech_recognition.AudioFile(wav_path) as source: 238 | audio = recognizer.record(source) 239 | return recognizer.recognize_google(audio) 240 | 241 | def cleanup_temp_files(self, *paths): 242 | for path in paths: 243 | try: 244 | os.remove(path) 245 | except Exception as e: 246 | self.reports.logs_report("warning", data=f"Failed to delete {path}: {e}") 247 | self.console.log_print("warning", f"Failed to delete {path}: {e}") 248 | 249 | def get_temp_audio_paths(self): 250 | timestamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S') 251 | mp3 = os.path.join(gettempdir(), f"{timestamp}.mp3") 252 | wav = os.path.join(gettempdir(), f"{timestamp}.wav") 253 | 254 | return mp3, wav 255 | 256 | def get_text_blocked(self, driver): 257 | try: 258 | recaptcha_header = driver.find_element(By.CLASS_NAME, "rc-doscaptcha-body-text") 259 | return recaptcha_header 260 | except NoSuchElementException: 261 | return None 262 | 263 | def is_blocked(self, driver): 264 | blocked = self.get_text_blocked(driver) 265 | if blocked is not None: 266 | self.reports.logs_report("error", data=f"Failed to bypass v2 protection. IP has been blocked! {Bgcolor.BLUE}reason{Bgcolor.DEFAULT}:{blocked.text}") 267 | self.console.log_print("error", msg=f"Failed to bypass v2 protection. IP has been blocked! {Bgcolor.BLUE}reason{Bgcolor.DEFAULT}:{blocked.text}") 268 | 269 | if driver.current_url == "https://www.google.com/sorry/index": 270 | self.reports.logs_report("error", data="Unexpected response comes from search engines") 271 | self.console.log_print("error", msg="Unexpected response comes from search engines") 272 | 273 | async def solve_captcha(self, driver, url): 274 | self.reports.logs_report("debug", data=f"Bad URL {url}") 275 | self.console.debugging(self.debug, msg=f"Bad URL {url}") 276 | 277 | self.wait = WebDriverWait(driver, 5) 278 | 279 | driver.get(url) 280 | await self.recaptcha_service(driver) -------------------------------------------------------------------------------- /src/godork/services/scrape.py: -------------------------------------------------------------------------------- 1 | import re 2 | import os 3 | import time 4 | import random 5 | import psutil 6 | 7 | from aiohttp import ClientSession, TCPConnector 8 | 9 | from ..utils.colors import Bgcolor 10 | from ..utils.exceptions import GodorkException, GodorkTimeout, GodorkNoData, GodorkMaxRetries 11 | from ..utils.parse import get_query, get_page_num, set_page_num 12 | from ..helpers.console import Console 13 | from ..helpers.reports import Reports 14 | from ..helpers.extractor import extract_pages, extract_data 15 | from .requester import Requester 16 | from .driver import SeleniumDriver 17 | from .recaptcha import RecaptchaBypass 18 | 19 | class Scraper: 20 | 21 | """ 22 | The Scraper class is a sophisticated tool designed for scraping Google search results, efficiently managing retries, handling CAPTCHAs, and extracting valuable data like links and titles. 23 | It provides both synchronous and asynchronous scraping capabilities, ensuring flexibility and robustness while interacting with Google's search engine. 24 | 25 | Key Features: 26 | 27 | 1. Initialization (__init__): 28 | 29 | * The class initializes a number of key parameters like: 30 | 31 | - Dorks: A list of search queries (either from a file or input string). 32 | - Debugging and Proxy Settings: Configuration for debugging and using proxies. 33 | - Retries: Mechanism to retry failed requests with a configurable retry count and maximum retry limit. 34 | - Headless Mode: Configuration to run the scraper in headless mode for browser interactions. 35 | 36 | * The scraper utilizes several components for functionality: 37 | 38 | - Console: For logging and output management. 39 | - Reports: For generating detailed reports about the scraping process. 40 | - Requester: For handling the HTTP requests (both synchronous and asynchronous). 41 | - RecaptchaBypass: A service for bypassing CAPTCHA protections encountered during scraping. 42 | 43 | 2. Parameter Construction (params): 44 | 45 | * This method generates the request parameters for querying Google search results, including: 46 | 47 | - q: The search query. 48 | - client: Randomly selecting a user agent string (chrome, firefox, ubuntu, etc.) 49 | - start: The page number for pagination. 50 | 51 | 3. Asynchronous Connection Handling (reuse_connection): 52 | 53 | * This function manages retries when CAPTCHA protection is triggered on Google search results pages. 54 | * It calls the RecaptchaBypass.solve_captcha method to handle CAPTCHA challenges. 55 | * If CAPTCHA is detected, the method tries to bypass it by interacting with the page's reCAPTCHA service and retries the process for a set number of attempts. 56 | 57 | 4. Fetching URLs (fetch_urls): 58 | 59 | * This method initiates an HTTP GET request to retrieve search result pages. 60 | * It handles various HTTP response codes: 61 | 62 | - 200 OK: Processes valid search results and extracts data. 63 | - 3xx Redirects: Detects CAPTCHA challenges and attempts to bypass them. 64 | - 4xx and 5xx Errors: Logs client and server errors. 65 | 66 | * It attempts to bypass reCAPTCHA challenges automatically when detected. 67 | * If the request fails, it retries a set number of times before throwing an error. 68 | 69 | 5. Fetching Links (fetch_links): 70 | 71 | * This method handles the core logic of iterating through search queries (dorks) and fetching search result pages. 72 | * For each query, it sends requests to multiple pages (using the params method to adjust the page number) and attempts to extract links and titles from the result. 73 | * It also gracefully handles exceptions such as timeouts and CAPTCHA protection issues, retrying requests when necessary. 74 | 75 | 6. Running the Scraper (run_with_async): 76 | 77 | * This method serves as the entry point for running the asynchronous scraper. 78 | * It starts by printing introductory messages, including warnings about using the scraper responsibly. 79 | * Using async with ClientSession, it establishes a session to interact with Google search, managing retries and exceptions along the way. 80 | * Once the scraping process completes, the session is closed, and the final report is saved. 81 | 82 | """ 83 | 84 | def __init__(self, dorks, proxy, debug, retries, max_retries, headless_mode): 85 | self.base_url = "https://www.google.com/search" 86 | 87 | self.dorks = dorks.strip().splitlines() if not os.path.isfile(dorks) else open(dorks, 'r').read().strip().splitlines() 88 | self.proxy = proxy 89 | self.debug = debug 90 | self.retries = retries 91 | self.max_retries = max_retries 92 | self.headless = headless_mode 93 | 94 | self.console = Console() 95 | self.reports = Reports() 96 | self.requester = Requester() 97 | self.recaptcha_service = RecaptchaBypass(debug, headless_mode=headless_mode) 98 | 99 | def get_memory_usage(self): 100 | process = psutil.Process(os.getpid()) 101 | print(self.console.text_format("info", msg=f"Memory usage: {process.memory_info().rss / 1024 ** 2:.2f} MB")) 102 | 103 | def params(self, query, page): 104 | return { 105 | "q": query, 106 | "channel": "fs", 107 | "client": random.choice(["chrome", "firefox", "ubuntu", "gws"]), 108 | "start": page, 109 | } 110 | 111 | async def reuse_connection(self, session, url, retry_count=0, **kwargs): 112 | num_page = kwargs.get("num_page") 113 | 114 | if retry_count >= self.max_retries: 115 | raise GodorkMaxRetries("Maximum retries attempts reached for solving v2 protection") 116 | 117 | self.reports.logs_report("info", data="Initiating v2 bypass...") 118 | self.console.log_print("info", msg="Initiating v2 bypass...") 119 | 120 | with SeleniumDriver(headless_mode=self.headless) as driver: 121 | try: 122 | await self.recaptcha_service.solve_captcha(driver, url) 123 | 124 | target_url = driver.current_url 125 | data_html = driver.page_source 126 | 127 | try: 128 | last_page = extract_pages(data_html) 129 | self.reports.logs_report("info", data=f"Total known pages: {last_page}") 130 | self.console.log_print("info", msg=f"Total known pages: {last_page}") 131 | except IndexError: 132 | pass 133 | 134 | extract_data(data_html, reports=self.reports, metadata={"query": get_query(url), "num_page": set_page_num(num_page)}) 135 | 136 | await self.fetch_urls(session, url=target_url, params=None) 137 | 138 | except (GodorkException, GodorkTimeout) as err: 139 | self.reports.logs_report("error", data=f"Failed to bypass v2 protection. {Bgcolor.BLUE}reason{Bgcolor.DEFAULT}:{err}") 140 | self.console.log_print("error", msg=f"Failed to bypass v2 protection. {Bgcolor.BLUE}reason{Bgcolor.DEFAULT}:{err}") 141 | 142 | self.reports.logs_report("info", data=f"Retrying bypass of v2 protection (attempt: {retry_count+1}) on page {set_page_num(num_page)}") 143 | self.console.log_print("info", msg=f"Retrying bypass of v2 protection (attempt: {retry_count+1}) on page {set_page_num(num_page)}") 144 | 145 | await self.reuse_connection(session, url, num_page=num_page, retry_count=retry_count + 1) 146 | finally: 147 | driver.quit() 148 | 149 | async def fetch_urls(self, session, url, **kwargs): 150 | num_page = kwargs.get("params")["start"] if kwargs.get("params") is not None else get_page_num(url) 151 | i = 0 152 | 153 | while True: 154 | i += 1 155 | response, data_html = await self.requester.aioreqwest( 156 | session, 157 | method="GET", 158 | url=url, 159 | proxy=self.proxy, 160 | params=kwargs.get("params"), 161 | timeout=10, 162 | redirects=False 163 | ) 164 | 165 | self.reports.logs_report("debug", data=f"Initiating request to {str(response.url)}") 166 | self.console.debugging(self.debug, msg=f"Initiating request to {str(response.url)}") 167 | 168 | self.reports.logs_report("debug", data=f"Getting response status {response.status}") 169 | self.console.debugging(self.debug, msg=f"Getting response status {response.status}") 170 | 171 | if "Google Search" not in re.findall("(.*?)", data_html): 172 | 173 | if response.status == 200: 174 | try: 175 | last_page = extract_pages(data_html) 176 | self.reports.logs_report("info", data=f"Total known pages: {last_page}") 177 | self.console.log_print("info", msg=f"Total known pages: {last_page}") 178 | except IndexError: 179 | pass 180 | 181 | extract_data(data_html, reports=self.reports, metadata={"query": get_query(response.url), "num_page": set_page_num(num_page)}) 182 | 183 | if 300 <= response.status <= 399 and "https://www.google.com/sorry/index" in response.headers.get("Location"): 184 | url_redirection = response.headers["Location"] 185 | 186 | self.reports.logs_report("debug", data=f"Getting the redirect URL {url_redirection}") 187 | self.console.debugging(self.debug, msg=f"Getting the redirect URL {url_redirection}") 188 | 189 | self.reports.logs_report("warning", data="Requests were blocked due to provider-side protection") 190 | self.console.log_print("warning", msg="Requests were blocked due to provider-side protection ") 191 | 192 | self.reports.logs_report("warning", data=f"reCAPTCHA detected on the page {set_page_num(num_page)}") 193 | self.console.debugging(self.debug, msg=f"reCAPTCHA detected on the page {set_page_num(num_page)}") 194 | 195 | await self.reuse_connection(session, url=url_redirection, num_page=num_page, retry_count=0) 196 | 197 | if 400 <= response.status <= 499: 198 | self.reports.logs_report("error", data=f"Failed to fetch request on page {set_page_num(num_page)} {Bgcolor.BLUE}reason{Bgcolor.DEFAULT}:Client error occurred") 199 | self.console.log_print("error", msg=f"Failed to fetch request on page {set_page_num(num_page)} {Bgcolor.BLUE}reason{Bgcolor.DEFAULT}:Client error occurred") 200 | 201 | if 500 <= response.status <= 599: 202 | self.reports.logs_report("error", data=f"Failed to fetch request on page {set_page_num(num_page)} {Bgcolor.BLUE}reason{Bgcolor.DEFAULT}:Server error occurred") 203 | self.console.log_print("error", msg=f"Failed to fetch request on page {set_page_num(num_page)} {Bgcolor.BLUE}reason{Bgcolor.DEFAULT}:Server error occurred") 204 | 205 | break 206 | 207 | if i >= self.retries: 208 | raise GodorkMaxRetries("The request failed after reaching the maximum number of retries attempts") 209 | 210 | else: 211 | print(f"\r{self.console.out_log_format('warning', msg=f'Unexpected provider response. Retrying (request: {i})')}", flush=True, end="\r") 212 | 213 | async def fetch_links(self, session, url): 214 | for query in self.dorks: 215 | self.reports.logs_report("info", data=f"{Bgcolor.BOLD}Starting enumeration for {query}{Bgcolor.DEFAULT}") 216 | self.console.log_print("info", msg=f"{Bgcolor.BOLD}Starting enumeration for {query}{Bgcolor.DEFAULT}") 217 | 218 | for i in range(0, 501, 10): 219 | self.console.debugging(self.debug, msg=f"Performing an HTTP GET request on page {set_page_num(i)}") 220 | self.reports.logs_report("info", data=f"Performing an HTTP GET request on page {set_page_num(i)}") 221 | 222 | try: 223 | await self.fetch_urls(session, url=url, params=self.params(query=query, page=i)) 224 | except GodorkMaxRetries as err: 225 | self.reports.logs_report("warning", data=err) 226 | self.console.log_print("warning", msg=err) 227 | 228 | self.reports.logs_report("info", data="Try using the `--no-headless` option to make changes") 229 | self.console.log_print("info", msg="Try using the `--no-headless` option to make changes") 230 | break 231 | except GodorkNoData as err: 232 | self.reports.logs_report("info", data=err) 233 | self.console.log_print("info", msg=err) 234 | break 235 | except Exception as err: 236 | self.reports.logs_report("error", data=err) 237 | self.console.log_print("error", msg=err) 238 | break 239 | 240 | async def run_with_async(self): 241 | print(self.console.text_format("info", msg="A high-speed scraper for collecting links and titles from Google search results")) 242 | print(self.console.text_format("warning", msg="Use with caution. You are responsible for your actions")) 243 | print(self.console.text_format("warning", msg="Developers assume no liability and are not responsible for any issue or damage")) 244 | 245 | time.sleep(1) 246 | 247 | async with ClientSession(connector=TCPConnector(ssl=False if self.proxy else True)) as session: 248 | try: 249 | await self.fetch_links(session, url=self.base_url) 250 | finally: 251 | await session.close() 252 | 253 | print(self.console.text_format("info", msg="Report saved to {}".format(self.reports.base_dir))) 254 | self.get_memory_usage() --------------------------------------------------------------------------------