├── README.md
└── otx-url.py


/README.md:
--------------------------------------------------------------------------------
 1 | # OTX AlienVault URL Scraper
 2 | 
 3 | This Python script allows you to fetch URLs associated with given domains from the **OTX (AlienVault Open Threat Exchange)** API and save the collected URLs to a file. It supports fetching URLs in a paginated manner, automatically going through each page of results until no more URLs are found.
 4 | 
 5 | ## Features
 6 | 
 7 | - Fetch URLs associated with multiple domains from OTX API.
 8 | - Saves the collected URLs to a user-defined output file.
 9 | 
10 | ## Requirements
11 | 
12 | - Python 3.x
13 | - **requests** library (can be installed via `pip install requests`)
14 | 
15 | ## Usage
16 | 
17 | 1. **Clone the repository:**
18 | 
19 | ```bash
20 | git clone https://github.com/killua889/otx-url.git
21 | cd otx-url
22 | chmod +x otx-url.py
23 | ```
24 | ## Example :
25 | 
26 | ```bash
27 | python3 otx_url.py -l domains.txt -o output_urls.txt
28 | ```
29 | 
30 | `-l` Input file with a list of domains (one domain per line).
31 | 
32 | `-o` Output file where the collected URLs will be saved (default: collected_urls.txt).
33 | 
34 | Example domain.txt
35 | ```bash
36 | example.com
37 | example3.com
38 | s.example.com
39 | ```


--------------------------------------------------------------------------------
/otx-url.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import requests
 3 | import argparse
 4 | import os
 5 | import time
 6 | 
 7 | def print_color(text, color_code):
 8 |     print(f"\033[{color_code}m{text}\033[0m")
 9 | 
10 | def fetch_urls(domain):
11 |     page = 1
12 |     all_urls = []
13 | 
14 |     print_color(f"\n[+] Fetching URLs for domain: {domain}", "96")  # Cyan
15 |     while True:
16 |         url = f"https://otx.alienvault.com/api/v1/indicators/domain/{domain}/url_list?limit=100&page={page}"
17 |         try:
18 |             response = requests.get(url, timeout=10)
19 |         except requests.exceptions.RequestException as e:
20 |             print_color(f"[-] Request error for {domain} page {page}: {e}", "91")
21 |             break
22 | 
23 |         if response.status_code != 200:
24 |             print_color(f"[-] Failed to fetch page {page} for {domain} (Status: {response.status_code})", "91")
25 |             break
26 | 
27 |         data = response.json()
28 |         url_list = data.get("url_list", [])
29 | 
30 |         if not url_list:
31 |             print_color(f"[!] No more URLs found on page {page} — stopping.", "93")  # Yellow
32 |             break
33 | 
34 |         for item in url_list:
35 |             extracted_url = item.get("url")
36 |             if extracted_url:
37 |                 print(extracted_url)
38 |                 all_urls.append(extracted_url)
39 | 
40 |         print_color(f"[+] Page {page}: Found {len(url_list)} URLs", "92")  # Green
41 |         page += 1
42 |         time.sleep(1)  # Be nice to the API
43 | 
44 |     return all_urls
45 | 
46 | def main():
47 |     parser = argparse.ArgumentParser(description="OTX AlienVault URL scraper.")
48 |     parser.add_argument("-l", "--list", required=True, help="Input file with list of domains")
49 |     parser.add_argument("-o", "--output", default="collected_urls.txt", help="Output file name (default: collected_urls.txt)")
50 |     args = parser.parse_args()
51 | 
52 |     domains_file = args.list
53 |     output_file = args.output
54 | 
55 |     if not os.path.isfile(domains_file):
56 |         print_color(f"[-] File not found: {domains_file}", "91")
57 |         exit(1)
58 | 
59 |     with open(domains_file, "r") as f:
60 |         domains = [line.strip() for line in f if line.strip()]
61 | 
62 |     total = 0
63 |     with open(output_file, "w") as outfile:
64 |         for domain in domains:
65 |             urls = fetch_urls(domain)
66 |             for url in urls:
67 |                 outfile.write(url + "\n")
68 |             total += len(urls)
69 | 
70 |     print_color(f"\n[0] Done. Total URLs collected: {total}", "92")
71 |     print_color(f"[0] Saved to: {output_file}", "94")
72 | 
73 | if __name__ == "__main__":
74 |     main()


--------------------------------------------------------------------------------