├── requirements.txt ├── LICENSE ├── README.md └── tor_dork_search.py /requirements.txt: -------------------------------------------------------------------------------- 1 | requests>=2.25.1 2 | socks>=1.0.1 3 | tqdm>=4.60.0 4 | PySocks>=1.7.1 5 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 Profanatica 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Tor-Dork-Search 2 | An OSINT tool for searching DuckDuckGo via Tor using search dorks. 3 | 4 | ## Features 5 | 6 | - Anonymous searching through Tor network 7 | - Multiple concurrent searches 8 | - Multiple link extraction methods 9 | - Automatic retries with delays 10 | - CAPTCHA detection 11 | - Progress tracking with tqdm 12 | 13 | ## Requirements 14 | 15 | - Python 3.6+ 16 | - Tor service running locally 17 | - Required Python packages (install via `requirements.txt`) 18 | 19 | ## Installation 20 | 21 | 1. Install Tor: 22 | 23 | # For Debian/Ubuntu 24 | 25 | sudo apt install tor 26 | 27 | sudo systemctl start tor 28 | 29 | Clone this repository: 30 | 31 | git clone https://github.com/Profanatic/tor-dork-search.git 32 | 33 | cd tor-dork-search 34 | 35 | Install dependencies: 36 | 37 | pip3 install -r requirements.txt 38 | 39 | Usage 40 | Create a text file with your search dorks (one per line) 41 | 42 | Run the script: 43 | 44 | python3 tor_dork_search.py -d dorks.txt -o results.txt 45 | 46 | Example 47 | 48 | python3 tor_dork_search.py -d dorks.txt -o results.txt -j 5 49 | 50 | Troubleshooting 51 | 52 | If you get zero results: 53 | 54 | Verify Tor is running (systemctl status tor) 55 | 56 | Test Tor connection: torsocks curl https://check.torproject.org/api/ip 57 | 58 | Try simpler dorks first 59 | 60 | Increase delays between requests 61 | 62 | Disclaimer 63 | This tool is for educational and legitimate research purposes only. The developers are not responsible for any misuse of this software. 64 | -------------------------------------------------------------------------------- /tor_dork_search.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Tor Dork Search - An OSINT tool for searching DuckDuckGo via Tor using dorks 4 | """ 5 | 6 | import requests 7 | import random 8 | import time 9 | import argparse 10 | import os 11 | from tqdm import tqdm, trange 12 | from urllib.parse import unquote, urlparse 13 | import socks 14 | import socket 15 | from concurrent.futures import ThreadPoolExecutor, as_completed 16 | from typing import List, Set, Optional 17 | import re 18 | 19 | # Constants 20 | TOR_PROXY = "socks5h://127.0.0.1:9050" 21 | DEFAULT_USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" 22 | MAX_RETRIES = 3 23 | MIN_DELAY = 5 24 | MAX_DELAY = 10 25 | MAX_WORKERS = 3 26 | REQUEST_TIMEOUT = 30 27 | 28 | def check_tor_connection() -> bool: 29 | """Check if Tor connection is working properly""" 30 | try: 31 | session = get_session() 32 | response = session.get("https://check.torproject.org/api/ip", timeout=REQUEST_TIMEOUT) 33 | return '"IsTor":true' in response.text 34 | except Exception: 35 | return False 36 | 37 | def configure_tor_proxy() -> None: 38 | """Configure system to use Tor SOCKS5 proxy""" 39 | try: 40 | socks.set_default_proxy(socks.SOCKS5, "127.0.0.1", 9050) 41 | socket.socket = socks.socksocket 42 | except Exception as e: 43 | raise ConnectionError(f"Failed to configure Tor proxy: {e}") 44 | 45 | def get_session() -> requests.Session: 46 | """Create and configure a requests session with Tor""" 47 | session = requests.Session() 48 | session.proxies = {'http': TOR_PROXY, 'https': TOR_PROXY} 49 | session.headers.update({ 50 | 'User-Agent': DEFAULT_USER_AGENT, 51 | 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 52 | 'Accept-Language': 'en-US,en;q=0.5', 53 | 'DNT': '1', 54 | 'Connection': 'keep-alive', 55 | 'Upgrade-Insecure-Requests': '1' 56 | }) 57 | session.timeout = REQUEST_TIMEOUT 58 | return session 59 | 60 | def search_duckduckgo(query: str, session: requests.Session) -> Optional[str]: 61 | """Search DuckDuckGo using Tor""" 62 | url = f"https://html.duckduckgo.com/html/?q={requests.utils.quote(query)}" 63 | 64 | try: 65 | response = session.get(url) 66 | response.raise_for_status() 67 | 68 | # Check for CAPTCHA page 69 | if "https://duckduckgo.com/sorry" in response.url: 70 | tqdm.write(f"⚠️ CAPTCHA encountered for query: {query}") 71 | return None 72 | 73 | return response.text 74 | except requests.exceptions.RequestException as e: 75 | tqdm.write(f"⚠️ Error searching '{query[:50]}...': {str(e)}") 76 | return None 77 | 78 | def extract_links(page_content: str) -> List[str]: 79 | """Extract unique HTTP/HTTPS links from page content""" 80 | links = set() 81 | 82 | if not page_content: 83 | return [] 84 | 85 | # Multiple extraction methods to handle different HTML structures 86 | # Method 1: Standard result links 87 | for match in re.finditer(r'', page_content): 88 | url = unquote(match.group(1)) 89 | if url.startswith('http') and 'duckduckgo.com' not in url: 90 | links.add(url) 91 | 92 | # Method 2: Redirect URLs 93 | for match in re.finditer(r'uddg=(.*?)&', page_content): 94 | url = unquote(match.group(1)) 95 | if url.startswith('http'): 96 | links.add(url) 97 | 98 | # Method 3: Result URL class 99 | for match in re.finditer(r'class="result__url".*?href="(.*?)"', page_content): 100 | url = unquote(match.group(1)) 101 | if url.startswith('http'): 102 | links.add(url) 103 | 104 | return sorted(links) 105 | 106 | def process_dork(dork: str, session: requests.Session) -> Set[str]: 107 | """Process a single dork with retries and delays""" 108 | links = set() 109 | 110 | for attempt in range(MAX_RETRIES): 111 | try: 112 | content = search_duckduckgo(dork, session) 113 | if content: 114 | new_links = extract_links(content) 115 | if new_links: 116 | links.update(new_links) 117 | break 118 | 119 | time.sleep(random.uniform(MIN_DELAY, MAX_DELAY)) 120 | except Exception as e: 121 | tqdm.write(f"⚠️ Error processing '{dork[:50]}...': {e}") 122 | time.sleep(random.uniform(MIN_DELAY * 2, MAX_DELAY * 2)) 123 | 124 | if not links: 125 | tqdm.write(f"❌ Failed after {MAX_RETRIES} attempts: {dork[:50]}...") 126 | else: 127 | tqdm.write(f"✔️ Found {len(links)} links for: {dork[:50]}...") 128 | 129 | return links 130 | 131 | def main(): 132 | """Main function to parse arguments and execute the search""" 133 | parser = argparse.ArgumentParser(description='OSINT Search with Tor using Dorks') 134 | parser.add_argument('-d', '--dorks', required=True, 135 | help='Path to dorks file (.txt)') 136 | parser.add_argument('-o', '--output', default="tor_dork_results.txt", 137 | help='Output file path') 138 | parser.add_argument('-j', '--workers', type=int, default=MAX_WORKERS, 139 | help='Number of concurrent workers') 140 | args = parser.parse_args() 141 | 142 | # Validate input file 143 | if not os.path.isfile(args.dorks): 144 | print(f"❌ Dorks file not found: {args.dorks}") 145 | return 146 | 147 | # Read dorks 148 | try: 149 | with open(args.dorks, 'r', encoding='utf-8') as f: 150 | dorks = [line.strip() for line in f if line.strip()] 151 | except Exception as e: 152 | print(f"❌ Error reading dorks file: {e}") 153 | return 154 | 155 | if not dorks: 156 | print("❌ No dorks found in the file.") 157 | return 158 | 159 | # Configure Tor and verify connection 160 | try: 161 | configure_tor_proxy() 162 | if not check_tor_connection(): 163 | print("❌ Tor connection check failed. Is Tor running?") 164 | return 165 | print("✔️ Tor connection verified") 166 | except ConnectionError as e: 167 | print(f"❌ {e}") 168 | return 169 | 170 | # Process dorks 171 | all_links = set() 172 | session = get_session() 173 | 174 | print(f"\n🔍 Starting search for {len(dorks)} dorks with {args.workers} workers...\n") 175 | 176 | with ThreadPoolExecutor(max_workers=args.workers) as executor: 177 | futures = { 178 | executor.submit(process_dork, dork, session): dork 179 | for dork in dorks 180 | } 181 | 182 | for future in tqdm(as_completed(futures), total=len(dorks), desc="Processing"): 183 | try: 184 | links = future.result() 185 | all_links.update(links) 186 | except Exception as e: 187 | tqdm.write(f"⚠️ Unexpected error: {e}") 188 | 189 | # Save results 190 | try: 191 | with open(args.output, 'w', encoding='utf-8') as f_out: 192 | for link in sorted(all_links): 193 | f_out.write(f"{link}\n") 194 | 195 | print(f"\n✅ Search completed. Found {len(all_links)} unique links") 196 | print(f"📄 Results saved to: {os.path.abspath(args.output)}") 197 | 198 | if not all_links: 199 | print("\nℹ️ No links were found. Possible reasons:") 200 | print("- Tor connection issues") 201 | print("- DuckDuckGo is blocking requests") 202 | print("- The dorks didn't match any results") 203 | print("- Try increasing delays between requests") 204 | except Exception as e: 205 | print(f"❌ Failed to save results: {e}") 206 | 207 | if __name__ == "__main__": 208 | main() 209 | --------------------------------------------------------------------------------