├── README.md └── otx-url.py /README.md: -------------------------------------------------------------------------------- 1 | # OTX AlienVault URL Scraper 2 | 3 | This Python script allows you to fetch URLs associated with given domains from the **OTX (AlienVault Open Threat Exchange)** API and save the collected URLs to a file. It supports fetching URLs in a paginated manner, automatically going through each page of results until no more URLs are found. 4 | 5 | ## Features 6 | 7 | - Fetch URLs associated with multiple domains from OTX API. 8 | - Saves the collected URLs to a user-defined output file. 9 | 10 | ## Requirements 11 | 12 | - Python 3.x 13 | - **requests** library (can be installed via `pip install requests`) 14 | 15 | ## Usage 16 | 17 | 1. **Clone the repository:** 18 | 19 | ```bash 20 | git clone https://github.com/killua889/otx-url.git 21 | cd otx-url 22 | chmod +x otx-url.py 23 | ``` 24 | ## Example : 25 | 26 | ```bash 27 | python3 otx_url.py -l domains.txt -o output_urls.txt 28 | ``` 29 | 30 | `-l` Input file with a list of domains (one domain per line). 31 | 32 | `-o` Output file where the collected URLs will be saved (default: collected_urls.txt). 33 | 34 | Example domain.txt 35 | ```bash 36 | example.com 37 | example3.com 38 | s.example.com 39 | ``` -------------------------------------------------------------------------------- /otx-url.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import requests 3 | import argparse 4 | import os 5 | import time 6 | 7 | def print_color(text, color_code): 8 | print(f"\033[{color_code}m{text}\033[0m") 9 | 10 | def fetch_urls(domain): 11 | page = 1 12 | all_urls = [] 13 | 14 | print_color(f"\n[+] Fetching URLs for domain: {domain}", "96") # Cyan 15 | while True: 16 | url = f"https://otx.alienvault.com/api/v1/indicators/domain/{domain}/url_list?limit=100&page={page}" 17 | try: 18 | response = requests.get(url, timeout=10) 19 | except requests.exceptions.RequestException as e: 20 | print_color(f"[-] Request error for {domain} page {page}: {e}", "91") 21 | break 22 | 23 | if response.status_code != 200: 24 | print_color(f"[-] Failed to fetch page {page} for {domain} (Status: {response.status_code})", "91") 25 | break 26 | 27 | data = response.json() 28 | url_list = data.get("url_list", []) 29 | 30 | if not url_list: 31 | print_color(f"[!] No more URLs found on page {page} — stopping.", "93") # Yellow 32 | break 33 | 34 | for item in url_list: 35 | extracted_url = item.get("url") 36 | if extracted_url: 37 | print(extracted_url) 38 | all_urls.append(extracted_url) 39 | 40 | print_color(f"[+] Page {page}: Found {len(url_list)} URLs", "92") # Green 41 | page += 1 42 | time.sleep(1) # Be nice to the API 43 | 44 | return all_urls 45 | 46 | def main(): 47 | parser = argparse.ArgumentParser(description="OTX AlienVault URL scraper.") 48 | parser.add_argument("-l", "--list", required=True, help="Input file with list of domains") 49 | parser.add_argument("-o", "--output", default="collected_urls.txt", help="Output file name (default: collected_urls.txt)") 50 | args = parser.parse_args() 51 | 52 | domains_file = args.list 53 | output_file = args.output 54 | 55 | if not os.path.isfile(domains_file): 56 | print_color(f"[-] File not found: {domains_file}", "91") 57 | exit(1) 58 | 59 | with open(domains_file, "r") as f: 60 | domains = [line.strip() for line in f if line.strip()] 61 | 62 | total = 0 63 | with open(output_file, "w") as outfile: 64 | for domain in domains: 65 | urls = fetch_urls(domain) 66 | for url in urls: 67 | outfile.write(url + "\n") 68 | total += len(urls) 69 | 70 | print_color(f"\n[0] Done. Total URLs collected: {total}", "92") 71 | print_color(f"[0] Saved to: {output_file}", "94") 72 | 73 | if __name__ == "__main__": 74 | main() --------------------------------------------------------------------------------