├── README.md ├── clamav.sh ├── lookup_filehash.py ├── lookup_filehash_bulk.py ├── lookup_url.py ├── lookup_url_bulk.py └── submit_url.py /README.md: -------------------------------------------------------------------------------- 1 | # URLhaus 2 | URLhaus is an open platform for sharing malware distribution sites. This repository provides some sample python3 scripts on how to interact with the URLhaus bulk API. 3 | 4 | ## Obtain an Auth-Key 5 | In order to query the URLhaus API, you need to obtain an ```Auth-Key```. If you don't have an Auth-Key yet, you can get one at https://auth.abuse.ch/ for free. 6 | 7 | ## Report a malware URL 8 | This script lets you report a malware URL to URLhaus: 9 | 10 | ``` 11 | python3 submit_url.py http://evildomain1.tld/bad 12 | ``` 13 | 14 | ## Lookup a URL 15 | This script calls the URLhaus [URL information endpoint](https://urlhaus-api.abuse.ch/#urlinfo), looking up a particular URL in the URLhaus database: 16 | 17 | ``` 18 | python3 lookup_url.py http://77.73.133.113/lego/mine.exe 19 | ``` 20 | 21 | If you want to bulk lookups multiple URLs at the same time, you can save them to a file (one URL per line) and use the bulk lookup script, e.g.: 22 | 23 | ``` 24 | python3 lookup_url_bulk.py url-list.txt 25 | ``` 26 | 27 | ## Lookup a file hash (MD5 or SHA256) 28 | This script calls the URLhaus [payload information endpoint](https://urlhaus-api.abuse.ch/#payloadinfo), looking up a particular hash (MD5 or SHA256 hash) in the URLhaus database: 29 | 30 | ``` 31 | python3 lookup_filehash.py d72ba95c67364911636a82f711732eb67e235bb31b17928e832228e847d25890 32 | ``` 33 | 34 | If you want to bulk lookups multiple hashes at the same time, you can save them to a file (one MD5 or SHA256 hash per line) and use the bulk lookup script, e.g.: 35 | 36 | ``` 37 | python3 lookup_filehash_bulk.py hash-list.txt 38 | ``` 39 | 40 | ## ClamAV rules for detecting known bad URLs 41 | URLhaus publishes a ClamAV signature file, detecting malware distribution sites in e.g. emails. By running [clamav.sh](https://github.com/abusech/URLhaus/blob/master/clamav.sh) every minute as cronjob, you can make sure that the URLhaus signature DB stays up to date. 42 | 43 | ## API documentation 44 | The documentation for the URLhaus bulk API os available here: 45 | 46 | https://urlhaus-api.abuse.ch/ 47 | 48 | ## Feed of collected payloads 49 | URLhaus provides an hourly and daily batch of payload collected from malware distribution sites. The feeds are available here: 50 | 51 | Hourly feed: https://datalake.abuse.ch/urlhaus/hourly/ 52 | Daily feed: https://datalake.abuse.ch/urlhaus/daily/ 53 | -------------------------------------------------------------------------------- /clamav.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # This script updates Clamav definitions with data from URLhaus (https://urlhaus.abuse.ch/api/#clamav) 4 | # 5 | # !!! To receive best protection, you should setup a cronjob that executes this script every minute !!! 6 | # 7 | # Please set up the following variables to fit your system 8 | 9 | CLAMDIR="/var/lib/clamav" 10 | CLAMUSER="clamav" 11 | CLAMGROUP="clamav" 12 | 13 | # Don't change anything below this line 14 | 15 | RELOAD=0 16 | 17 | lockfile -r 0 /tmp/local.the.lock 2>/dev/null || exit 1 18 | 19 | rm -rf /tmp/urlhaus 20 | mkdir /tmp/urlhaus 21 | 22 | curl -s https://urlhaus.abuse.ch/downloads/urlhaus.ndb -o /tmp/urlhaus/urlhaus.ndb 23 | 24 | if [ $? -eq 0 ]; then 25 | clamscan --quiet -d /tmp/urlhaus /tmp/urlhaus 2>&1 >/dev/null 26 | if [ $? -eq 0 ]; then 27 | if [ -f "$CLAMDIR"/urlhaus.ndb ]; then 28 | MD5old=`md5sum "$CLAMDIR"/urlhaus.ndb` 29 | MD5new=`md5sum /tmp/urlhaus/urlhaus.ndb` 30 | if ! [ "$MD5old" = "$MD5new" ]; then 31 | # Updated file 32 | cp /tmp/urlhaus/urlhaus.ndb $CLAMDIR 33 | chown $CLAMUSER.$CLAMGROUP "$CLAMDIR"/urlhaus.ndb 34 | RELOAD=1 35 | fi 36 | else 37 | # Looks like it's the first run 38 | cp /tmp/urlhaus/urlhaus.ndb $CLAMDIR 39 | chown $CLAMUSER.$CLAMGROUP "$CLAMDIR"/urlhaus.ndb 40 | RELOAD=1 41 | fi 42 | fi 43 | fi 44 | 45 | if [ $RELOAD -eq 1 ]; then 46 | clamdscan --reload 47 | fi 48 | 49 | rm -rf /tmp/urlhaus 50 | rm -f /tmp/local.the.lock 51 | -------------------------------------------------------------------------------- /lookup_filehash.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | import sys 3 | import requests 4 | import json 5 | import re 6 | 7 | def query_urlhaus(auth_key, file_hash): 8 | # Validate file hash provided 9 | if re.search(r"^[A-Za-z0-9]{32}$", file_hash): 10 | hash_algo = 'md5_hash' 11 | elif re.search(r"^[A-Za-z0-9]{64}$", file_hash): 12 | hash_algo = 'sha256_hash' 13 | else: 14 | print("Invalid file hash provided") 15 | return 16 | # Construct the HTTP request 17 | data = { 18 | hash_algo : file_hash 19 | } 20 | # Set the Authentication header 21 | headers = { 22 | "Auth-Key" : auth_key 23 | } 24 | response = requests.post('https://urlhaus-api.abuse.ch/v1/payload/', data, headers=headers) 25 | # Parse the response from the API 26 | json_response = response.json() 27 | if json_response['query_status'] == 'ok': 28 | print(json.dumps(json_response, indent=4, sort_keys=False)) 29 | elif json_response['query_status'] == 'no_results': 30 | print("No results") 31 | else: 32 | print(json_response['query_status']) 33 | 34 | if len(sys.argv) > 2: 35 | query_urlhaus(sys.argv[1], sys.argv[2]) 36 | else: 37 | print("Looking up a file hash (MD5 or SHA256) on the URLhaus bulk API") 38 | print("Usage: python3 lookup_filehash.py ") 39 | print("Note: If you don't have an Auth-Key yet, you can obtain one at https://auth.abuse.ch/") 40 | -------------------------------------------------------------------------------- /lookup_filehash_bulk.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | import sys 3 | import urllib3 4 | import json 5 | import os 6 | import re 7 | 8 | # Prepare HTTPSConnectionPool 9 | pool = urllib3.HTTPSConnectionPool('urlhaus-api.abuse.ch', port=443, maxsize=10) 10 | 11 | def query_urlhaus(auth_key, file_hash): 12 | # Validate file hash provided 13 | if re.search(r"^[A-Za-z0-9]{32}$", file_hash): 14 | hash_algo = 'md5_hash' 15 | elif re.search(r"^[A-Za-z0-9]{64}$", file_hash): 16 | hash_algo = 'sha256_hash' 17 | else: 18 | print(f"[-] Illegal hash: {file_hash}") 19 | return 20 | # Construct the HTTP request 21 | data = { 22 | hash_algo : file_hash 23 | } 24 | # Set the Authentication header 25 | headers = { 26 | "Auth-Key" : auth_key 27 | } 28 | response = pool.request_encode_body("POST", "/v1/payload/", fields=data, encode_multipart=False, headers=headers) 29 | # Parse the response from the API 30 | response = response.data.decode("utf-8", "ignore") 31 | # Convert response to JSON 32 | json_response = json.loads(response) 33 | if json_response['query_status'] == 'ok': 34 | signature = json_response['signature'] 35 | print(f"[+] FOUND: {file_hash} {signature}") 36 | elif json_response['query_status'] == 'no_results': 37 | print(f"[-] Not found: {file_hash}") 38 | else: 39 | print(f"[-] Error: {file_hash}: {json_response['query_status']}") 40 | 41 | if len(sys.argv) > 2: 42 | if not os.path.isfile(sys.argv[2]): 43 | print("Input file not found") 44 | quit() 45 | file = open(sys.argv[2], 'r') 46 | hashes = file.readlines() 47 | for hash in hashes: 48 | query_urlhaus(sys.argv[1], hash.strip()) 49 | else: 50 | print("Takes a local file name as argument and looks up each file hash (MD5 or SHA256 hash) sequentialy on the URLhaus bulk API") 51 | print("Input file must contain one MD5 or SHA256 hash per line") 52 | print("Usage: python3 lookup_filehash_bulk.py ") 53 | print("Note: If you don't have an Auth-Key yet, you can obtain one at https://auth.abuse.ch/") 54 | -------------------------------------------------------------------------------- /lookup_url.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | import sys 3 | import requests 4 | import json 5 | 6 | def query_urlhaus(auth_key, url): 7 | # Construct the HTTP request 8 | data = { 9 | 'url' : url 10 | } 11 | # Set the Authentication header 12 | headers = { 13 | "Auth-Key" : auth_key 14 | } 15 | response = requests.post('https://urlhaus-api.abuse.ch/v1/url/', data, headers=headers) 16 | # Parse the response from the API 17 | json_response = response.json() 18 | if json_response['query_status'] == 'ok': 19 | print(json.dumps(json_response, indent=4, sort_keys=False)) 20 | elif json_response['query_status'] == 'no_results': 21 | print("No results") 22 | else: 23 | print(json_response['query_status']) 24 | 25 | if len(sys.argv) > 2: 26 | query_urlhaus(sys.argv[1], sys.argv[2]) 27 | else: 28 | print("Looking up a URL on the URLhaus bulk API") 29 | print("Usage: python3 lookup_url.py ") 30 | print("Note: If you don't have an Auth-Key yet, you can obtain one at https://auth.abuse.ch/") 31 | -------------------------------------------------------------------------------- /lookup_url_bulk.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | import sys 3 | import urllib3 4 | import json 5 | import os 6 | 7 | # Prepare HTTPSConnectionPool 8 | pool = urllib3.HTTPSConnectionPool('urlhaus-api.abuse.ch', port=443, maxsize=10) 9 | 10 | def query_urlhaus(auth_key, url): 11 | # Construct the HTTP request 12 | data = { 13 | 'url' : url 14 | } 15 | # Set the Authentication header 16 | headers = { 17 | "Auth-Key" : auth_key 18 | } 19 | response = pool.request_encode_body("POST", "/v1/url/", fields=data, encode_multipart=False, headers=headers) 20 | # Parse the response from the API 21 | response = response.data.decode("utf-8", "ignore") 22 | # Convert response to JSON 23 | json_response = json.loads(response) 24 | if json_response['query_status'] == 'ok': 25 | print(f"[+] FOUND: {url}") 26 | elif json_response['query_status'] == 'no_results': 27 | print(f"[-] Not found: {url}") 28 | else: 29 | print(f"[-] Error: {url}: {json_response['query_status']}") 30 | 31 | if len(sys.argv) > 2: 32 | if not os.path.isfile(sys.argv[2]): 33 | print("Input file not found") 34 | quit() 35 | file = open(sys.argv[2], 'r') 36 | urls = file.readlines() 37 | for url in urls: 38 | query_urlhaus(sys.argv[1], url.strip()) 39 | else: 40 | print("Takes a local file name as argument and looks up each URL sequentialy on the URLhaus bulk API") 41 | print("Input file must contain one URL per line") 42 | print("Usage: python3 lookup_url_bulk.py ") 43 | print("Note: If you don't have an Auth-Key yet, you can obtain one at https://auth.abuse.ch/") 44 | -------------------------------------------------------------------------------- /submit_url.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import sys 3 | import json 4 | import requests 5 | 6 | def report_urlhaus(auth_key, url): 7 | jsonData = { 8 | 'anonymous' : '0', 9 | 'submission' : [ 10 | { 11 | 'url' : url, 12 | 'threat' : 'malware_download', 13 | 'tags' : [ 14 | 'Emotet', 15 | 'doc' 16 | ] 17 | } 18 | ] 19 | } 20 | 21 | headers = { 22 | "Content-Type" : "application/json", 23 | "Auth-Key" : auth_key 24 | } 25 | r = requests.post('https://urlhaus.abuse.ch/api/', json=jsonData, timeout=15, headers=headers) 26 | print(r.content) 27 | 28 | if len(sys.argv) > 2: 29 | report_urlhaus(sys.argv[1], sys.argv[2]) 30 | else: 31 | print("Report a malware URL to URLhaus") 32 | print("Usage: python3 submit_url.py ") 33 | print("Note: If you don't have an Auth-Key yet, you can obtain one at https://auth.abuse.ch/") 34 | --------------------------------------------------------------------------------