├── .nvmrc ├── .gitignore ├── wrangler.toml ├── package.json ├── src ├── exclude-url.txt ├── gitlab_status.sh ├── build.js ├── exclude.txt ├── ids.js ├── clean_url.js └── script.sh ├── LICENSE ├── .gitlab-ci.yml ├── .github └── workflows │ └── pages.yml ├── LICENSE-CC0.md └── README.md /.nvmrc: -------------------------------------------------------------------------------- 1 | lts/* 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | tmp/ 2 | .vscode/ 3 | public/ 4 | node_modules/ 5 | -------------------------------------------------------------------------------- /wrangler.toml: -------------------------------------------------------------------------------- 1 | name = "urlhaus-filter" 2 | pages_build_output_dir = "public" 3 | 4 | [vars] 5 | ASDF_NODEJS_LEGACY_FILE_DYNAMIC_STRATEGY = "latest_available" 6 | 7 | [env.production.vars] 8 | ASDF_NODEJS_LEGACY_FILE_DYNAMIC_STRATEGY = "latest_available" 9 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "urlhaus-filter", 3 | "private": true, 4 | "scripts": { 5 | "build": "node src/build.js" 6 | }, 7 | "dependencies": { 8 | "unzipper": "^0.12.3" 9 | }, 10 | "engines": { 11 | "node": ">= 18.12.0" 12 | }, 13 | "type": "module" 14 | } 15 | -------------------------------------------------------------------------------- /src/exclude-url.txt: -------------------------------------------------------------------------------- 1 | # URL exclusion list 2 | # Domains/URLs listed here will be excluded from domain-based and URL-based filters 3 | # Any entry with slash (/) will not be applied to domain-based filters 4 | # Include only top 1m (sub)domains that do not host user content 5 | github.githubassets.com 6 | -------------------------------------------------------------------------------- /src/gitlab_status.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | ARTIFACT_STATUS=$(curl -sSIL "https://gitlab.com/malware-filter/urlhaus-filter/-/jobs/artifacts/main/download?job=pages" | grep -F "HTTP/2 200") 4 | PIPELINE_STATUS=$(curl -sSL "https://gitlab.com/malware-filter/urlhaus-filter/badges/main/pipeline.svg" | grep -F "failed") 5 | GITLAB_STATUS="up" 6 | 7 | if [ -z "$ARTIFACT_STATUS" ] || [ -n "$PIPELINE_STATUS" ]; then 8 | GITLAB_STATUS="down" 9 | fi 10 | 11 | echo "GITLAB_STATUS=$GITLAB_STATUS" >> "$GITHUB_ENV" 12 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Ming Di Leom 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /.gitlab-ci.yml: -------------------------------------------------------------------------------- 1 | image: node:lts-alpine 2 | 3 | include: 4 | - template: Security/Secret-Detection.gitlab-ci.yml 5 | 6 | # Only run pipeline when scheduled or "Run pipeline" in the main branch 7 | workflow: 8 | rules: 9 | - if: '$CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH && ($CI_PIPELINE_SOURCE == "schedule" || $CI_PIPELINE_SOURCE == "web")' 10 | 11 | build_job: 12 | stage: build 13 | 14 | before_script: 15 | - apk update && apk add brotli curl file grep jq zstd 16 | 17 | script: 18 | - sh src/script.sh 19 | - find public -type f -regex '.*\.\(txt\|conf\|tpl\|rules\|csv\)$' -exec gzip -f -k -9 {} \; 20 | - find public -type f -regex '.*\.\(txt\|conf\|tpl\|rules\|csv\)$' -exec brotli -f -k -9 {} \; 21 | - find public -type f -regex '.*\.\(txt\|conf\|tpl\|rules\|csv\)$' -exec zstd -f -k -9 {} \; 22 | 23 | artifacts: 24 | paths: 25 | - tmp 26 | - public 27 | expire_in: 1 week 28 | 29 | pages: 30 | stage: deploy 31 | 32 | dependencies: 33 | - build_job 34 | 35 | script: 36 | - echo 37 | 38 | artifacts: 39 | paths: 40 | - public 41 | expire_in: 1 week 42 | 43 | cloudflare: 44 | stage: deploy 45 | 46 | before_script: 47 | - apk update && apk add curl 48 | 49 | script: 50 | - curl -X POST "https://api.cloudflare.com/client/v4/pages/webhooks/deploy_hooks/$CLOUDFLARE_BUILD_HOOK" 51 | 52 | rules: 53 | - if: $CLOUDFLARE_BUILD_HOOK 54 | 55 | netlify: 56 | stage: deploy 57 | 58 | dependencies: 59 | - build_job 60 | 61 | before_script: 62 | - npm install netlify-cli -g 63 | - netlify --telemetry-disable 64 | 65 | script: 66 | - netlify deploy --dir=public --prod 67 | 68 | cache: 69 | paths: 70 | - node_modules/ 71 | 72 | rules: 73 | - if: $NETLIFY_SITE_ID 74 | -------------------------------------------------------------------------------- /src/build.js: -------------------------------------------------------------------------------- 1 | 'use strict' 2 | 3 | // for deployment outside of GitLab CI, e.g. Cloudflare Pages and Netlify 4 | 5 | import { Extract } from 'unzipper' 6 | import { dirname, join } from 'node:path' 7 | import { mkdir } from 'node:fs/promises' 8 | import { pipeline } from 'node:stream/promises' 9 | import { fileURLToPath } from 'node:url' 10 | import { Readable } from 'node:stream' 11 | 12 | const __dirname = dirname(fileURLToPath(import.meta.url)) 13 | const rootPath = join(__dirname, '..') 14 | const publicPath = join(rootPath, 'public') 15 | const artifactsUrl = 'https://gitlab.com/malware-filter/urlhaus-filter/-/jobs/artifacts/main/download?job=pages' 16 | const pipelineUrl = 'https://gitlab.com/malware-filter/urlhaus-filter/badges/main/pipeline.svg' 17 | const ghMirror = 'https://nightly.link/curbengh/urlhaus-filter/workflows/pages/main/public.zip' 18 | 19 | const pipelineStatus = async (url) => { 20 | console.log(`Checking pipeline from "${url}"`) 21 | try { 22 | const svg = await (await fetch(url)).text() 23 | if (svg.includes('failed')) throw new Error('last gitlab pipeline failed') 24 | } catch ({ message }) { 25 | throw new Error(message) 26 | } 27 | } 28 | 29 | const f = async () => { 30 | console.log(`Downloading artifacts.zip from "${artifactsUrl}"`) 31 | try { 32 | await pipeline( 33 | Readable.fromWeb((await fetch(artifactsUrl)).body), 34 | Extract({ path: rootPath }) 35 | ) 36 | await pipelineStatus(pipelineUrl) 37 | } catch ({ message }) { 38 | console.error(JSON.stringify({ 39 | error: message, 40 | link: artifactsUrl 41 | })) 42 | 43 | console.log(`Downloading artifacts.zip from "${ghMirror}"`) 44 | 45 | await mkdir(publicPath, { recursive: true }) 46 | 47 | try { 48 | await pipeline( 49 | Readable.fromWeb((await fetch(ghMirror)).body), 50 | Extract({ path: publicPath }) 51 | ) 52 | } catch ({ message }) { 53 | throw new Error(JSON.stringify({ 54 | error: message, 55 | link: ghMirror 56 | })) 57 | } 58 | } 59 | } 60 | 61 | f() 62 | -------------------------------------------------------------------------------- /.github/workflows/pages.yml: -------------------------------------------------------------------------------- 1 | name: Pages 2 | 3 | on: 4 | schedule: 5 | - cron: "0 0,12 * * *" 6 | workflow_dispatch: 7 | 8 | jobs: 9 | pages: 10 | runs-on: ubuntu-latest 11 | container: node:lts-alpine 12 | steps: 13 | - uses: actions/checkout@v4 14 | - name: Install Dependencies 15 | run: | 16 | apk update 17 | apk add brotli curl file git grep jq zstd 18 | - name: Build 19 | env: 20 | CF_API: ${{ secrets.CF_API }} 21 | run: sh ./src/script.sh 22 | - name: Compress 23 | run: | 24 | find public -type f -regex '.*\.\(txt\|conf\|tpl\|rules\|csv\)$' -exec gzip -f -k -9 {} \; 25 | find public -type f -regex '.*\.\(txt\|conf\|tpl\|rules\|csv\)$' -exec brotli -f -k -9 {} \; 26 | find public -type f -regex '.*\.\(txt\|conf\|tpl\|rules\|csv\)$' -exec zstd -f -k -9 {} \; 27 | - name: Deploy 28 | uses: peaceiris/actions-gh-pages@v4 29 | with: 30 | github_token: ${{ secrets.GITHUB_TOKEN }} 31 | publish_dir: ./public 32 | force_orphan: true 33 | - name: "Upload Public Folder" 34 | uses: actions/upload-artifact@v4 35 | with: 36 | name: public 37 | path: ./public 38 | retention-days: 30 39 | - name: "Upload Tmp Folder" 40 | uses: actions/upload-artifact@v4 41 | with: 42 | name: tmp 43 | path: ./tmp 44 | retention-days: 30 45 | - name: Check GitLab Status 46 | env: 47 | GITHUB_ENV: ${{ env.GITHUB_ENV }} 48 | run: sh ./src/gitlab_status.sh 49 | - name: Cloudflare Pages 50 | env: 51 | CLOUDFLARE_BUILD_HOOK: ${{ secrets.CLOUDFLARE_BUILD_HOOK }} 52 | if: ${{ env.CLOUDFLARE_BUILD_HOOK != 0 && env.GITLAB_STATUS == 'down' }} 53 | run: curl -X POST "https://api.cloudflare.com/client/v4/pages/webhooks/deploy_hooks/${{ env.CLOUDFLARE_BUILD_HOOK }}" 54 | - name: Netlify 55 | env: 56 | NETLIFY_SITE_ID: ${{ secrets.NETLIFY_SITE_ID }} 57 | NETLIFY_AUTH_TOKEN: ${{ secrets.NETLIFY_AUTH_TOKEN }} 58 | if: ${{ env.NETLIFY_SITE_ID != 0 && env.GITLAB_STATUS == 'down' }} 59 | run: | 60 | npm install netlify-cli -g 61 | netlify --telemetry-disable 62 | netlify deploy --dir=public --prod 63 | -------------------------------------------------------------------------------- /src/exclude.txt: -------------------------------------------------------------------------------- 1 | # Domain exclusion list 2 | # Domains listed here will be excluded from domain-based filters only, not URL-based filters 3 | void.cat 4 | pdesaa.cimaa.pt 5 | users.telenet.be 6 | u.teknik.io 7 | digitalschnitt.de 8 | cd.textfiles.com 9 | s3.amazonaws.com 10 | s3.us-east-2.amazonaws.com 11 | s3.us-east-1.amazonaws.com 12 | s3.us-west-1.amazonaws.com 13 | s3.us-west-2.amazonaws.com 14 | s3.af-south-1.amazonaws.com 15 | s3.ap-east-1.amazonaws.com 16 | s3.ap-south-2.amazonaws.com 17 | s3.ap-southeast-3.amazonaws.com 18 | s3.ap-southeast-4.amazonaws.com 19 | s3.ap-south-1.amazonaws.com 20 | s3.ap-northeast-3.amazonaws.com 21 | s3.ap-northeast-2.amazonaws.com 22 | s3.ap-southeast-1.amazonaws.com 23 | s3.ap-southeast-2.amazonaws.com 24 | s3.ap-northeast-1.amazonaws.com 25 | s3.ca-central-1.amazonaws.com 26 | s3.eu-central-1.amazonaws.com 27 | s3.eu-west-1.amazonaws.com 28 | s3.eu-west-2.amazonaws.com 29 | s3.eu-south-1.amazonaws.com 30 | s3.eu-west-3.amazonaws.com 31 | s3.eu-south-2.amazonaws.com 32 | s3.eu-north-1.amazonaws.com 33 | s3.eu-central-2.amazonaws.com 34 | s3.me-south-1.amazonaws.com 35 | s3.me-central-1.amazonaws.com 36 | s3.sa-east-1.amazonaws.com 37 | s3.us-gov-east-1.amazonaws.com 38 | s3.us-gov-west-1.amazonaws.com 39 | s3.cn-north-1.amazonaws.com.cn 40 | s3.cn-northwest-1.amazonaws.com.cn 41 | s3-us-east-2.amazonaws.com 42 | s3-us-east-1.amazonaws.com 43 | s3-us-west-1.amazonaws.com 44 | s3-us-west-2.amazonaws.com 45 | s3-af-south-1.amazonaws.com 46 | s3-ap-east-1.amazonaws.com 47 | s3-ap-south-1.amazonaws.com 48 | s3-ap-northeast-3.amazonaws.com 49 | s3-ap-northeast-2.amazonaws.com 50 | s3-ap-southeast-1.amazonaws.com 51 | s3-ap-southeast-2.amazonaws.com 52 | s3-ap-northeast-1.amazonaws.com 53 | s3-ca-central-1.amazonaws.com 54 | s3-cn-north-1.amazonaws.com.cn 55 | s3-cn-northwest-1.amazonaws.com.cn 56 | s3-eu-central-1.amazonaws.com 57 | s3-eu-west-1.amazonaws.com 58 | s3-eu-west-2.amazonaws.com 59 | s3-eu-south-1.amazonaws.com 60 | s3-eu-west-3.amazonaws.com 61 | s3-eu-north-1.amazonaws.com 62 | s3-sa-east-1.amazonaws.com 63 | s3-me-south-1.amazonaws.com 64 | s3-us-gov-east-1.amazonaws.com 65 | s3-us-gov-west-1.amazonaws.com 66 | dl.packetstormsecurity.net 67 | cfs5.tistory.com 68 | litter.catbox.moe 69 | link.storjshare.io 70 | storage.bunnycdn.com 71 | eu2.contabostorage.com 72 | sin1.contabostorage.com 73 | usc1.contabostorage.com 74 | res.cloudinary.com 75 | landley.net 76 | r2.e-z.host 77 | a.uguu.se 78 | f.uguu.se 79 | i.uguu.se 80 | x0.at 81 | -------------------------------------------------------------------------------- /src/ids.js: -------------------------------------------------------------------------------- 1 | import { createWriteStream } from 'node:fs' 2 | import { open } from 'node:fs/promises' 3 | 4 | const domains = await open('malware-domains-online.txt') 5 | const urls = await open('malware-url-top-domains-raw-online.txt') 6 | 7 | const snort2 = createWriteStream('../public/urlhaus-filter-snort2-online.rules', { 8 | encoding: 'utf8', 9 | flags: 'a' 10 | }) 11 | const snort3 = createWriteStream('../public/urlhaus-filter-snort3-online.rules', { 12 | encoding: 'utf8', 13 | flags: 'a' 14 | }) 15 | const suricata = createWriteStream('../public/urlhaus-filter-suricata-online.rules', { 16 | encoding: 'utf8', 17 | flags: 'a' 18 | }) 19 | const suricataSni = createWriteStream('../public/urlhaus-filter-suricata-sni-online.rules', { 20 | encoding: 'utf8', 21 | flags: 'a' 22 | }) 23 | const splunk = createWriteStream('../public/urlhaus-filter-splunk-online.csv', { 24 | encoding: 'utf8', 25 | flags: 'a' 26 | }) 27 | 28 | let sid = 100000001 29 | 30 | for await (const domain of domains.readLines()) { 31 | snort2.write(`alert tcp $HOME_NET any -> $EXTERNAL_NET [80,443] (msg:"urlhaus-filter malicious website detected"; flow:established,from_client; content:"GET"; http_method; content:"${domain}"; content:"Host"; http_header; classtype:trojan-activity; sid:${sid}; rev:1;)\n`) 32 | snort3.write(`alert http $HOME_NET any -> $EXTERNAL_NET any (msg:"urlhaus-filter malicious website detected"; http_header:field host; content:"${domain}",nocase; classtype:trojan-activity; sid:${sid}; rev:1;)\n`) 33 | suricata.write(`alert http $HOME_NET any -> $EXTERNAL_NET any (msg:"urlhaus-filter malicious website detected"; flow:established,from_client; http.method; content:"GET"; http.host; content:"${domain}"; classtype:trojan-activity; sid:${sid} rev:1;)\n`) 34 | suricataSni.write(`alert tls $HOME_NET any -> $EXTERNAL_NET any (msg:"urlhaus-filter malicious website detected"; flow:established,from_client; tls.sni; bsize:32; content:"${domain}"; fast_pattern; classtype:trojan-activity; sid:${sid} rev:1;)\n`) 35 | splunk.write(`"${domain}","","urlhaus-filter malicious website detected","${process.env.CURRENT_TIME}"\n`) 36 | 37 | sid++ 38 | } 39 | 40 | suricataSni.close() 41 | 42 | for await (const line of urls.readLines()) { 43 | if (!URL.canParse(`http://${line}`)) { 44 | console.error(`Invalid URL: ${line}`) 45 | continue 46 | } 47 | 48 | const url = new URL(`http://${line}`) 49 | const { hostname, pathname, search } = url 50 | const pathEscape = `${pathname}${search}`.replaceAll(';', '\\;') 51 | const path = pathname + search 52 | 53 | snort2.write(`alert tcp $HOME_NET any -> $EXTERNAL_NET [80,443] (msg:"urlhaus-filter malicious website detected"; flow:established,from_client; content:"GET"; http_method; content:"${pathEscape.substring(0, 2048)}"; http_uri; nocase; content:"${hostname}"; content:"Host"; http_header; classtype:trojan-activity; sid:${sid}; rev:1;)\n`) 54 | snort3.write(`alert http $HOME_NET any -> $EXTERNAL_NET any (msg:"urlhaus-filter malicious website detected"; http_header:field host; content:"${hostname}",nocase; http_uri; content:"${pathEscape}",nocase; classtype:trojan-activity; sid:${sid}; rev:1;)\n`) 55 | suricata.write(`alert http $HOME_NET any -> $EXTERNAL_NET any (msg:"urlhaus-filter malicious website detected"; flow:established,from_client; http.method; content:"GET"; http.uri; content:"${pathEscape}"; endswith; nocase; http.host; content:"${hostname}"; classtype:trojan-activity; sid:${sid}; rev:1;)\n`) 56 | splunk.write(`"${hostname}","${path}","urlhaus-filter malicious website detected","${process.env.CURRENT_TIME}"\n`) 57 | 58 | sid++ 59 | } 60 | 61 | snort2.close() 62 | snort3.close() 63 | suricata.close() 64 | splunk.close() 65 | -------------------------------------------------------------------------------- /src/clean_url.js: -------------------------------------------------------------------------------- 1 | 'use strict' 2 | 3 | import { createInterface } from 'node:readline' 4 | 5 | const cleanHost = (hostname) => { 6 | return hostname 7 | // Remove invalid protocol, see #32 8 | .replace(/^(https?:\/\/)(?:ttps:\/\/|https:\/|http\/)/, '$1') 9 | .replace(/^(https?:\/\/)?www\./, '$1') 10 | } 11 | 12 | // nodejs does not percent-encode ^ yet 13 | // https://github.com/nodejs/node/issues/57313 14 | // Applies to path, exclude query string 15 | const caretPath = (pathname) => { 16 | if (!pathname.includes('?')) return pathname.replaceAll('^', '%5E') 17 | 18 | const pathArray = pathname.split('?') 19 | const path = pathArray[0].replaceAll('^', '%5E') 20 | const search = pathArray.slice(1).join('?') 21 | 22 | return `${path}?${search}` 23 | } 24 | 25 | const safeLinks = [ 26 | 'safelinks\\.protection\\.outlook\\.com', 27 | '\\.protection\\.sophos\\.com', 28 | 'linkprotect\\.cudasvc\\.com', 29 | 'ctp\\.trendmicro\\.com', 30 | 'urlsand\\.esvalabs\\.com' 31 | ] 32 | 33 | const deSafelink = (urlStr) => { 34 | let url = new URL(urlStr) 35 | 36 | // O365 Safelinks & Trendmicro 37 | if (url.hostname.endsWith('safelinks.protection.outlook.com') || url.hostname.endsWith('ctp.trendmicro.com')) { 38 | url = new URL(url.searchParams.get('url')) 39 | } 40 | 41 | // Sophos 42 | if (url.hostname.endsWith('.protection.sophos.com')) { 43 | url = new URL(`http://${url.searchParams.get('d')}`) 44 | } 45 | 46 | // Barracuda 47 | if (url.hostname.endsWith('linkprotect.cudasvc.com')) { 48 | url = new URL(url.searchParams.get('a')) 49 | } 50 | 51 | // ShopMy & Disqus 52 | if ((url.hostname === 'api.shopmy.us' && url.pathname === '/api/redirect_click') || url.hostname === 'disq.us') { 53 | url = new URL(url.searchParams.get('url')) 54 | } 55 | 56 | // VKontakte 57 | if ((url.hostname === 'vk.com' || url.hostname === 'vkontakte.ru') && url.pathname === '/away.php') { 58 | url = new URL(url.searchParams.get('to')) 59 | } 60 | 61 | // WhatsApp, Esvalabs 62 | if ((url.hostname === 'l.wl.co' && url.pathname === '/l') || url.hostname === 'urlsand.esvalabs.com') { 63 | url = new URL(url.searchParams.get('u')) 64 | } 65 | 66 | // Google Ads 67 | if (url.hostname.endsWith('doubleclick.net') || url.hostname.endsWith('googleadservices.com')) { 68 | let paramUrl = url.searchParams.getAll('adurl').at(-1) || url.searchParams.getAll('url').at(-1) || url.searchParams.getAll('ds_dest_url').at(-1) 69 | if (paramUrl) { 70 | paramUrl = paramUrl.replace(/^\/\//, 'https://') 71 | url = new URL(paramUrl) 72 | } 73 | } 74 | 75 | // Google Search 76 | // Google AMP does not redirect (e.g. google.com/amp/example.com) 77 | if (url.hostname.endsWith('google.com') && (url.pathname.startsWith('/url') || url.pathname.startsWith('/travel/clk'))) { 78 | const paramUrl = url.searchParams.get('q') || url.searchParams.get('url') || url.searchParams.get('pcurl') 79 | if (paramUrl) url = new URL(paramUrl) 80 | } 81 | 82 | // SES 83 | // https://github.com/uBlockOrigin/uAssets/blob/42e518277ab0c36d4b131aa01b4a8828af4e18b6/filters/privacy.txt#L866 84 | if (url.hostname.endsWith('awstrack.me' && url.pathname.startsWith('/L0'))) { 85 | url = new URL(decodeURIComponent(url.pathname.match(/\/L0\/(http[^\/?#]+)/)[1])) 86 | } 87 | 88 | // DuckDuckGo 89 | if (url.hostname === 'duckduckgo.com' && url.pathname === '/l/') { 90 | url = new URL(url.searchParams.get('uddg')) 91 | } 92 | 93 | // Calendly 94 | if (url.hostname === 'calendly.com' && url.pathname === '/url') { 95 | url = new URL(url.searchParams.get('q')) 96 | } 97 | 98 | if (url.hostname.match(new RegExp(safeLinks.join('|')))) { 99 | return deSafelink(url.href) 100 | } 101 | 102 | return url.href 103 | } 104 | 105 | for await (const line of createInterface({ input: process.stdin, terminal: false })) { 106 | // parse hostname from url 107 | if (process.argv[2] === 'hostname') { 108 | if (URL.canParse(`http://${line}`)) { 109 | const url = new URL(`http://${line}`) 110 | 111 | console.log(url.hostname) 112 | } else { 113 | const hostname = line 114 | // host 115 | .split('/')[0] 116 | // exclude credential 117 | .replace(/.*@(.+)/, '$1') 118 | // exclude port 119 | .replace(/:\d+$/, '') 120 | // #2 121 | .split('?')[0] 122 | 123 | console.log(hostname) 124 | } 125 | } else { 126 | // Skip invalid domains, see #15 127 | if (line.split('/')[2].includes('??')) continue 128 | 129 | if (URL.canParse(line)) { 130 | const url = new URL(deSafelink(cleanHost(line))) 131 | 132 | url.host = cleanHost(url.host) 133 | 134 | // nodejs does not percent-encode ^ yet 135 | // https://github.com/nodejs/node/issues/57313 136 | url.pathname = caretPath(url.pathname) 137 | const outUrl = `${url.host}${url.pathname}${url.search}` 138 | // remove trailing slash from domain except path 139 | .replace(/(^[^/]*)\/+$/, '$1') 140 | 141 | console.log(outUrl) 142 | } else { 143 | const outUrl = caretPath(cleanHost(line 144 | // remove protocol 145 | .split('/').slice(2).join('/'))) 146 | // url encode space 147 | .replaceAll(' ', '%20') 148 | .replace(/(^[^/]*)\/+$/, '$1') 149 | 150 | console.log(outUrl) 151 | } 152 | } 153 | } 154 | -------------------------------------------------------------------------------- /LICENSE-CC0.md: -------------------------------------------------------------------------------- 1 | CC0 1.0 Universal 2 | ================== 3 | 4 | Statement of Purpose 5 | --------------------- 6 | 7 | The laws of most jurisdictions throughout the world automatically confer exclusive Copyright and Related Rights (defined below) upon the creator and subsequent owner(s) (each and all, an "owner") of an original work of authorship and/or a database (each, a "Work"). 8 | 9 | Certain owners wish to permanently relinquish those rights to a Work for the purpose of contributing to a commons of creative, cultural and scientific works ("Commons") that the public can reliably and without fear of later claims of infringement build upon, modify, incorporate in other works, reuse and redistribute as freely as possible in any form whatsoever and for any purposes, including without limitation commercial purposes. These owners may contribute to the Commons to promote the ideal of a free culture and the further production of creative, cultural and scientific works, or to gain reputation or greater distribution for their Work in part through the use and efforts of others. 10 | 11 | For these and/or other purposes and motivations, and without any expectation of additional consideration or compensation, the person associating CC0 with a Work (the "Affirmer"), to the extent that he or she is an owner of Copyright and Related Rights in the Work, voluntarily elects to apply CC0 to the Work and publicly distribute the Work under its terms, with knowledge of his or her Copyright and Related Rights in the Work and the meaning and intended legal effect of CC0 on those rights. 12 | 13 | 1. Copyright and Related Rights. 14 | -------------------------------- 15 | A Work made available under CC0 may be protected by copyright and related or neighboring rights ("Copyright and Related Rights"). Copyright and Related Rights include, but are not limited to, the following: 16 | 17 | i. the right to reproduce, adapt, distribute, perform, display, communicate, and translate a Work; 18 | ii. moral rights retained by the original author(s) and/or performer(s); 19 | iii. publicity and privacy rights pertaining to a person's image or likeness depicted in a Work; 20 | iv. rights protecting against unfair competition in regards to a Work, subject to the limitations in paragraph 4(a), below; 21 | v. rights protecting the extraction, dissemination, use and reuse of data in a Work; 22 | vi. database rights (such as those arising under Directive 96/9/EC of the European Parliament and of the Council of 11 March 1996 on the legal protection of databases, and under any national implementation thereof, including any amended or successor version of such directive); and 23 | vii. other similar, equivalent or corresponding rights throughout the world based on applicable law or treaty, and any national implementations thereof. 24 | 25 | 2. Waiver. 26 | ----------- 27 | To the greatest extent permitted by, but not in contravention of, applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and unconditionally waives, abandons, and surrenders all of Affirmer's Copyright and Related Rights and associated claims and causes of action, whether now known or unknown (including existing as well as future claims and causes of action), in the Work (i) in all territories worldwide, (ii) for the maximum duration provided by applicable law or treaty (including future time extensions), (iii) in any current or future medium and for any number of copies, and (iv) for any purpose whatsoever, including without limitation commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each member of the public at large and to the detriment of Affirmer's heirs and successors, fully intending that such Waiver shall not be subject to revocation, rescission, cancellation, termination, or any other legal or equitable action to disrupt the quiet enjoyment of the Work by the public as contemplated by Affirmer's express Statement of Purpose. 28 | 29 | 3. Public License Fallback. 30 | ---------------------------- 31 | Should any part of the Waiver for any reason be judged legally invalid or ineffective under applicable law, then the Waiver shall be preserved to the maximum extent permitted taking into account Affirmer's express Statement of Purpose. In addition, to the extent the Waiver is so judged Affirmer hereby grants to each affected person a royalty-free, non transferable, non sublicensable, non exclusive, irrevocable and unconditional license to exercise Affirmer's Copyright and Related Rights in the Work (i) in all territories worldwide, (ii) for the maximum duration provided by applicable law or treaty (including future time extensions), (iii) in any current or future medium and for any number of copies, and (iv) for any purpose whatsoever, including without limitation commercial, advertising or promotional purposes (the "License"). The License shall be deemed effective as of the date CC0 was applied by Affirmer to the Work. Should any part of the License for any reason be judged legally invalid or ineffective under applicable law, such partial invalidity or ineffectiveness shall not invalidate the remainder of the License, and in such case Affirmer hereby affirms that he or she will not (i) exercise any of his or her remaining Copyright and Related Rights in the Work or (ii) assert any associated claims and causes of action with respect to the Work, in either case contrary to Affirmer's express Statement of Purpose. 32 | 33 | 4. Limitations and Disclaimers. 34 | -------------------------------- 35 | 36 | a. No trademark or patent rights held by Affirmer are waived, abandoned, surrendered, licensed or otherwise affected by this document. 37 | b. Affirmer offers the Work as-is and makes no representations or warranties of any kind concerning the Work, express, implied, statutory or otherwise, including without limitation warranties of title, merchantability, fitness for a particular purpose, non infringement, or the absence of latent or other defects, accuracy, or the present or absence of errors, whether or not discoverable, all to the greatest extent permissible under applicable law. 38 | c. Affirmer disclaims responsibility for clearing rights of other persons that may apply to the Work or any use thereof, including without limitation any person's Copyright and Related Rights in the Work. Further, Affirmer disclaims responsibility for obtaining any necessary consents, permissions or other rights required for any use of the Work. 39 | d. Affirmer understands and acknowledges that Creative Commons is not a party to this document and has no duty or obligation with respect to this CC0 or use of the Work. 40 | 41 | For more information, please see 42 | https://creativecommons.org/publicdomain/zero/1.0/ -------------------------------------------------------------------------------- /src/script.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | if ! (set -o pipefail 2>/dev/null); then 4 | # dash does not support pipefail 5 | set -efx 6 | else 7 | set -efx -o pipefail 8 | fi 9 | 10 | # bash does not expand alias by default for non-interactive script 11 | if [ -n "$BASH_VERSION" ]; then 12 | shopt -s expand_aliases 13 | fi 14 | 15 | alias curl="curl -L" 16 | alias rm="rm -rf" 17 | 18 | ## Use GNU grep, busybox grep is not as performant 19 | DISTRO="" 20 | if [ -f "/etc/os-release" ]; then 21 | . "/etc/os-release" 22 | DISTRO="$ID" 23 | fi 24 | 25 | check_grep() { 26 | if [ -z "$(grep --help | grep 'GNU')" ]; then 27 | if [ -x "/usr/bin/grep" ]; then 28 | alias grep="/usr/bin/grep" 29 | check_grep 30 | else 31 | if [ "$DISTRO" = "alpine" ]; then 32 | echo "Please install GNU grep 'apk add grep'" 33 | else 34 | echo "GNU grep not found" 35 | fi 36 | exit 1 37 | fi 38 | fi 39 | } 40 | check_grep 41 | 42 | if ! command -v dos2unix &> /dev/null 43 | then 44 | if command -v busybox &> /dev/null 45 | then 46 | alias dos2unix="busybox dos2unix" 47 | else 48 | echo "dos2unix or busybox not found" 49 | exit 1 50 | fi 51 | fi 52 | 53 | if command -v unzip &> /dev/null 54 | then 55 | alias unzip="unzip -p" 56 | elif command -v busybox &> /dev/null 57 | then 58 | alias unzip="busybox unzip -p" 59 | elif command -v bsdunzip &> /dev/null 60 | then 61 | alias unzip="bsdunzip -p" 62 | else 63 | echo "unzip not found" 64 | exit 1 65 | fi 66 | 67 | ## Create a temporary working folder 68 | rm "tmp/" 69 | mkdir -p "tmp/" 70 | cd "tmp/" 71 | 72 | 73 | ## Prepare datasets 74 | curl "https://urlhaus.abuse.ch/downloads/csv/" -o "urlhaus.zip" 75 | curl "https://s3-us-west-1.amazonaws.com/umbrella-static/top-1m.csv.zip" -o "top-1m-umbrella.zip" 76 | curl "https://tranco-list.eu/download/daily/top-1m.csv.zip" -o "top-1m-tranco.zip" 77 | 78 | ## Cloudflare Radar 79 | if [ -n "$CF_API" ]; then 80 | mkdir -p "cf/" 81 | # Get the latest domain ranking buckets 82 | curl -X GET "https://api.cloudflare.com/client/v4/radar/datasets?limit=5&offset=0&datasetType=RANKING_BUCKET&format=json" \ 83 | -H "Authorization: Bearer $CF_API" -o "cf/datasets.json" 84 | # Get the top 1m bucket's dataset ID 85 | DATASET_ID=$(jq ".result.datasets[] | select(.meta.top==1000000) | .id" "cf/datasets.json") 86 | # Get the dataset download url 87 | curl --request POST \ 88 | --url "https://api.cloudflare.com/client/v4/radar/datasets/download" \ 89 | --header "Content-Type: application/json" \ 90 | --header "Authorization: Bearer $CF_API" \ 91 | --data "{ \"datasetId\": $DATASET_ID }" \ 92 | -o "cf/dataset-url.json" 93 | DATASET_URL=$(jq ".result.dataset.url" "cf/dataset-url.json" | sed 's/"//g') 94 | curl "$DATASET_URL" -o "cf/top-1m-radar.csv" 95 | 96 | ## Parse the Radar 1 Million 97 | cat "cf/top-1m-radar.csv" | \ 98 | dos2unix | \ 99 | tr "[:upper:]" "[:lower:]" | \ 100 | grep -F "." | \ 101 | sed "s/^www\.//" | \ 102 | sort -u > "top-1m-radar.txt" 103 | fi 104 | 105 | cp "../src/exclude.txt" "." 106 | cp "../src/exclude-url.txt" "." 107 | 108 | ## Prepare URLhaus.csv 109 | unzip "urlhaus.zip" | \ 110 | # Convert DOS to Unix line ending 111 | dos2unix | \ 112 | tr "[:upper:]" "[:lower:]" | \ 113 | # Remove comment 114 | sed "/^#/d" > "URLhaus.csv" 115 | 116 | ## Parse URLs 117 | cat "URLhaus.csv" | \ 118 | cut -f 6 -d '"' | \ 119 | node "../src/clean_url.js" | \ 120 | sort -u > "urlhaus.txt" 121 | 122 | ## Parse domain and IP address only 123 | cat "urlhaus.txt" | \ 124 | node "../src/clean_url.js" hostname | \ 125 | sort -u > "urlhaus-domains.txt" 126 | 127 | ## Parse online URLs only 128 | cat "URLhaus.csv" | \ 129 | grep -F '"online"' | \ 130 | cut -f 6 -d '"' | \ 131 | node "../src/clean_url.js" | \ 132 | sort -u > "urlhaus-online.txt" 133 | 134 | cat "urlhaus-online.txt" | \ 135 | node "../src/clean_url.js" hostname | \ 136 | sort -u > "urlhaus-domains-online.txt" 137 | 138 | 139 | ## Parse the Umbrella 1 Million 140 | unzip "top-1m-umbrella.zip" | \ 141 | dos2unix | \ 142 | tr "[:upper:]" "[:lower:]" | \ 143 | # Parse domains only 144 | cut -f 2 -d "," | \ 145 | grep -F "." | \ 146 | # Remove www. 147 | sed "s/^www\.//" | \ 148 | sort -u > "top-1m-umbrella.txt" 149 | 150 | ## Parse the Tranco 1 Million 151 | if [ -n "$(file 'top-1m-tranco.zip' | grep 'Zip archive data')" ]; then 152 | unzip "top-1m-tranco.zip" | \ 153 | dos2unix | \ 154 | tr "[:upper:]" "[:lower:]" | \ 155 | # Parse domains only 156 | cut -f 2 -d "," | \ 157 | grep -F "." | \ 158 | # Remove www. 159 | sed "s/^www\.//" | \ 160 | sort -u > "top-1m-tranco.txt" 161 | else 162 | # tranco has unreliable download 163 | echo "top-1m-tranco.zip is not a zip, skipping it..." 164 | touch "top-1m-tranco.txt" 165 | fi 166 | 167 | # Merge Umbrella and self-maintained top domains 168 | cat "top-1m-umbrella.txt" "top-1m-tranco.txt" "exclude.txt" | \ 169 | sort -u > "top-1m-well-known.txt" 170 | 171 | if [ -n "$CF_API" ] && [ -f "top-1m-radar.txt" ]; then 172 | cat "top-1m-radar.txt" >> "top-1m-well-known.txt" 173 | # sort in-place 174 | sort "top-1m-well-known.txt" -u -o "top-1m-well-known.txt" 175 | fi 176 | 177 | 178 | cat "exclude-url.txt" | \ 179 | sed "/^#/d" | \ 180 | # "example.com/path" -> "^example\.com/path" 181 | # slash doesn't need to be escaped 182 | sed -e "s/^/^/" -e "s/\./\\\./g" > "exclude-url-grep.txt" 183 | 184 | ## Parse popular domains from URLhaus 185 | cat "urlhaus-domains.txt" | \ 186 | # grep match whole line 187 | grep -Fx -f "top-1m-well-known.txt" > "urlhaus-top-domains.txt" 188 | 189 | 190 | ## Parse domains from URLhaus excluding popular domains 191 | cat "urlhaus-domains.txt" | \ 192 | grep -F -vf "urlhaus-top-domains.txt" | \ 193 | # exclude domains from domains-based filters #110 194 | grep -vf "exclude-url-grep.txt" | \ 195 | # Remove blank lines 196 | sed "/^$/d" > "malware-domains.txt" 197 | 198 | cat "urlhaus-domains-online.txt" | \ 199 | grep -F -vf "urlhaus-top-domains.txt" | \ 200 | grep -vf "exclude-url-grep.txt" | \ 201 | sed "/^$/d" > "malware-domains-online.txt" 202 | 203 | ## Parse malware URLs from popular domains 204 | cat "urlhaus.txt" | \ 205 | grep -F -f "urlhaus-top-domains.txt" | \ 206 | # exclude domains/URLs from URL-based filters #110 207 | grep -vf "exclude-url-grep.txt" | \ 208 | sed "s/^/||/" | \ 209 | sed 's/$/^$all/' > "malware-url-top-domains.txt" 210 | 211 | cat "urlhaus-online.txt" | \ 212 | grep -F -f "urlhaus-top-domains.txt" | \ 213 | grep -vf "exclude-url-grep.txt" | \ 214 | sed "s/^/||/" | \ 215 | sed 's/$/^$all/' > "malware-url-top-domains-online.txt" 216 | 217 | cat "urlhaus-online.txt" | \ 218 | grep -F -f "urlhaus-top-domains.txt" | \ 219 | grep -vf "exclude-url-grep.txt" > "malware-url-top-domains-raw-online.txt" 220 | 221 | 222 | ## Merge malware domains and URLs 223 | CURRENT_TIME=$(date -u +"%Y-%m-%dT%H:%M:%SZ") 224 | FIRST_LINE="! Title: Malicious URL Blocklist" 225 | SECOND_LINE="! Updated: $CURRENT_TIME" 226 | THIRD_LINE="! Expires: 12 hours (update frequency)" 227 | FOURTH_LINE="! Homepage: https://gitlab.com/malware-filter/urlhaus-filter" 228 | FIFTH_LINE="! License: https://gitlab.com/malware-filter/urlhaus-filter#license" 229 | SIXTH_LINE="! Source: https://urlhaus.abuse.ch/api/" 230 | COMMENT_ABP="$FIRST_LINE\n$SECOND_LINE\n$THIRD_LINE\n$FOURTH_LINE\n$FIFTH_LINE\n$SIXTH_LINE" 231 | 232 | mkdir -p "../public/" 233 | 234 | cat "malware-domains.txt" "malware-url-top-domains.txt" | \ 235 | sed "1i $COMMENT_ABP" > "../public/urlhaus-filter.txt" 236 | 237 | cat "malware-domains-online.txt" "malware-url-top-domains-online.txt" | \ 238 | sed "1i $COMMENT_ABP" | \ 239 | sed "1s/Malicious/Online Malicious/" > "../public/urlhaus-filter-online.txt" 240 | 241 | 242 | # Adguard Home (#19, #22) 243 | cat "malware-domains.txt" | \ 244 | sed "s/^/||/" | \ 245 | sed "s/$/^/" | \ 246 | sort -u > "malware-domains-adguard-home.txt" 247 | 248 | cat "malware-domains-online.txt" | \ 249 | sed "s/^/||/" | \ 250 | sed "s/$/^/" > "malware-domains-online-adguard-home.txt" 251 | 252 | cat "malware-domains-adguard-home.txt" | \ 253 | sed "1i $COMMENT_ABP" | \ 254 | sed "1s/Blocklist/Blocklist (AdGuard Home)/" > "../public/urlhaus-filter-agh.txt" 255 | 256 | cat "malware-domains-online-adguard-home.txt" | \ 257 | sed "1i $COMMENT_ABP" | \ 258 | sed "1s/Malicious/Online Malicious/" | \ 259 | sed "1s/Blocklist/Blocklist (AdGuard Home)/" > "../public/urlhaus-filter-agh-online.txt" 260 | 261 | 262 | # Adguard browser extension 263 | cat "malware-domains.txt" | \ 264 | sed "s/^/||/" | \ 265 | sed 's/$/^$all/' > "malware-domains-adguard.txt" 266 | 267 | cat "malware-domains-online.txt" | \ 268 | sed "s/^/||/" | \ 269 | sed 's/$/^$all/' > "malware-domains-online-adguard.txt" 270 | 271 | cat "malware-domains-adguard.txt" "malware-url-top-domains.txt" | \ 272 | sed "1i $COMMENT_ABP" | \ 273 | sed "1s/Blocklist/Blocklist (AdGuard)/" > "../public/urlhaus-filter-ag.txt" 274 | 275 | cat "malware-domains-online-adguard.txt" "malware-url-top-domains-online.txt" | \ 276 | sed "1i $COMMENT_ABP" | \ 277 | sed "1s/Malicious/Online Malicious/" | \ 278 | sed "1s/Blocklist/Blocklist (AdGuard)/" > "../public/urlhaus-filter-ag-online.txt" 279 | 280 | 281 | # Vivaldi 282 | cat "malware-domains.txt" | \ 283 | sed "s/^/||/" | \ 284 | sed 's/$/^$document/' > "malware-domains-vivaldi.txt" 285 | 286 | cat "malware-domains-online.txt" | \ 287 | sed "s/^/||/" | \ 288 | sed 's/$/^$document/' > "malware-domains-online-vivaldi.txt" 289 | 290 | cat "malware-domains-vivaldi.txt" "malware-url-top-domains.txt" | \ 291 | sed 's/\$all$/$document/' | \ 292 | sed "1i $COMMENT_ABP" | \ 293 | sed "1s/Blocklist/Blocklist (Vivaldi)/" > "../public/urlhaus-filter-vivaldi.txt" 294 | 295 | cat "malware-domains-online-vivaldi.txt" "malware-url-top-domains-online.txt" | \ 296 | sed 's/\$all$/$document/' | \ 297 | sed "1i $COMMENT_ABP" | \ 298 | sed "1s/Malicious/Online Malicious/" | \ 299 | sed "1s/Blocklist/Blocklist (Vivaldi)/" > "../public/urlhaus-filter-vivaldi-online.txt" 300 | 301 | 302 | ## Domains-only blocklist 303 | # awk + head is a workaround for sed prepend 304 | COMMENT=$(printf "$COMMENT_ABP" | sed "s/^!/#/" | sed "1s/URL/Domains/" | awk '{printf "%s\\n", $0}' | head -c -2) 305 | COMMENT_ONLINE=$(printf "$COMMENT" | sed "1s/Malicious/Online Malicious/" | awk '{printf "%s\\n", $0}' | head -c -2) 306 | 307 | cat "malware-domains.txt" | \ 308 | # remove IPv6 bracket 309 | sed -r "s/\[|\]//g" | \ 310 | sed "1i $COMMENT" > "../public/urlhaus-filter-domains.txt" 311 | 312 | cat "malware-domains-online.txt" | \ 313 | sed -r "s/\[|\]//g" | \ 314 | sed "1i $COMMENT_ONLINE" > "../public/urlhaus-filter-domains-online.txt" 315 | 316 | 317 | ## Hosts only 318 | cat "malware-domains.txt" | \ 319 | # exclude IPv4 320 | grep -vE "^([0-9]{1,3}[\.]){3}[0-9]{1,3}$" | \ 321 | # exclude IPv6 322 | grep -vE "^\[" > "malware-hosts.txt" 323 | 324 | cat "malware-domains-online.txt" | \ 325 | grep -vE "^([0-9]{1,3}[\.]){3}[0-9]{1,3}$" | \ 326 | grep -vE "^\[" > "malware-hosts-online.txt" 327 | 328 | 329 | ## Hosts file blocklist 330 | cat "malware-hosts.txt" | \ 331 | sed "s/^/0.0.0.0 /" | \ 332 | # Re-insert comment 333 | sed "1i $COMMENT" | \ 334 | sed "1s/Domains/Hosts/" > "../public/urlhaus-filter-hosts.txt" 335 | 336 | cat "malware-hosts-online.txt" | \ 337 | sed "s/^/0.0.0.0 /" | \ 338 | sed "1i $COMMENT_ONLINE" | \ 339 | sed "1s/Domains/Hosts/" > "../public/urlhaus-filter-hosts-online.txt" 340 | 341 | 342 | ## Dnsmasq-compatible blocklist 343 | cat "malware-hosts.txt" | \ 344 | sed "s/^/address=\//" | \ 345 | sed "s/$/\/0.0.0.0/" | \ 346 | sed "1i $COMMENT" | \ 347 | sed "1s/Blocklist/dnsmasq Blocklist/" > "../public/urlhaus-filter-dnsmasq.conf" 348 | 349 | cat "malware-hosts-online.txt" | \ 350 | sed "s/^/address=\//" | \ 351 | sed "s/$/\/0.0.0.0/" | \ 352 | sed "1i $COMMENT_ONLINE" | \ 353 | sed "1s/Blocklist/dnsmasq Blocklist/" > "../public/urlhaus-filter-dnsmasq-online.conf" 354 | 355 | 356 | ## BIND-compatible blocklist 357 | cat "malware-hosts.txt" | \ 358 | sed 's/^/zone "/' | \ 359 | sed 's/$/" { type master; notify no; file "null.zone.file"; };/' | \ 360 | sed "1i $COMMENT" | \ 361 | sed "1s/Blocklist/BIND Blocklist/" > "../public/urlhaus-filter-bind.conf" 362 | 363 | cat "malware-hosts-online.txt" | \ 364 | sed 's/^/zone "/' | \ 365 | sed 's/$/" { type master; notify no; file "null.zone.file"; };/' | \ 366 | sed "1i $COMMENT_ONLINE" | \ 367 | sed "1s/Blocklist/BIND Blocklist/" > "../public/urlhaus-filter-bind-online.conf" 368 | 369 | 370 | ## DNS Response Policy Zone (RPZ) 371 | CURRENT_UNIX_TIME="$(date +%s)" 372 | RPZ_SYNTAX="\n\$TTL 30\n@ IN SOA localhost. root.localhost. $CURRENT_UNIX_TIME 86400 3600 604800 30\n NS localhost.\n" 373 | 374 | cat "malware-hosts.txt" | \ 375 | sed "s/$/ CNAME ./" | \ 376 | sed '1 i\'"$RPZ_SYNTAX"'' | \ 377 | sed "1i $COMMENT" | \ 378 | sed "s/^#/;/" | \ 379 | sed "1s/Blocklist/RPZ Blocklist/" > "../public/urlhaus-filter-rpz.conf" 380 | 381 | cat "malware-hosts-online.txt" | \ 382 | sed "s/$/ CNAME ./" | \ 383 | sed '1 i\'"$RPZ_SYNTAX"'' | \ 384 | sed "1i $COMMENT_ONLINE" | \ 385 | sed "s/^#/;/" | \ 386 | sed "1s/Blocklist/RPZ Blocklist/" > "../public/urlhaus-filter-rpz-online.conf" 387 | 388 | 389 | ## Unbound-compatible blocklist 390 | cat "malware-hosts.txt" | \ 391 | sed 's/^/local-zone: "/' | \ 392 | sed 's/$/" always_nxdomain/' | \ 393 | sed "1i $COMMENT" | \ 394 | sed "1s/Blocklist/Unbound Blocklist/" > "../public/urlhaus-filter-unbound.conf" 395 | 396 | cat "malware-hosts-online.txt" | \ 397 | sed 's/^/local-zone: "/' | \ 398 | sed 's/$/" always_nxdomain/' | \ 399 | sed "1i $COMMENT_ONLINE" | \ 400 | sed "1s/Blocklist/Unbound Blocklist/" > "../public/urlhaus-filter-unbound-online.conf" 401 | 402 | 403 | ## dnscrypt-proxy blocklists 404 | # name-based 405 | cat "malware-hosts.txt" | \ 406 | sed "1i $COMMENT" | \ 407 | sed "1s/Domains/Names/" > "../public/urlhaus-filter-dnscrypt-blocked-names.txt" 408 | 409 | cat "malware-hosts-online.txt" | \ 410 | sed "1i $COMMENT_ONLINE" | \ 411 | sed "1s/Domains/Names/" > "../public/urlhaus-filter-dnscrypt-blocked-names-online.txt" 412 | 413 | # IPv4/6 414 | if grep -Eq "^(([0-9]{1,3}[\.]){3}[0-9]{1,3}$|\[)" "malware-domains.txt"; then 415 | cat "malware-domains.txt" | \ 416 | grep -E "^(([0-9]{1,3}[\.]){3}[0-9]{1,3}$|\[)" | \ 417 | sed -r "s/\[|\]//g" | \ 418 | sed "1i $COMMENT" | \ 419 | sed "1s/Domains/IPs/" > "../public/urlhaus-filter-dnscrypt-blocked-ips.txt" 420 | 421 | cat "malware-domains-online.txt" | \ 422 | grep -E "^(([0-9]{1,3}[\.]){3}[0-9]{1,3}$|\[)" | \ 423 | sed -r "s/\[|\]//g" | \ 424 | sed "1i $COMMENT_ONLINE" | \ 425 | sed "1s/Domains/IPs/" > "../public/urlhaus-filter-dnscrypt-blocked-ips-online.txt" 426 | else 427 | echo | \ 428 | sed "1i $COMMENT" | \ 429 | sed "1s/Domains/IPs/" > "../public/urlhaus-filter-dnscrypt-blocked-ips.txt" 430 | 431 | echo | \ 432 | sed "1i $COMMENT_ONLINE" | \ 433 | sed "1s/Domains/IPs/" > "../public/urlhaus-filter-dnscrypt-blocked-ips-online.txt" 434 | fi 435 | 436 | ## Wildcard subdomain 437 | cat "malware-domains.txt" | \ 438 | sed "s/^/*./" | \ 439 | sed "1i $COMMENT" | \ 440 | sed "1s/Blocklist/Wildcard Asterisk Blocklist/" > "../public/urlhaus-filter-wildcard.txt" 441 | 442 | cat "malware-domains-online.txt" | \ 443 | sed "s/^/*./" | \ 444 | sed "1i $COMMENT" | \ 445 | sed "1s/Blocklist/Wildcard Asterisk Blocklist/" > "../public/urlhaus-filter-wildcard-online.txt" 446 | 447 | 448 | # Snort, Suricata, Splunk 449 | rm "../public/urlhaus-filter-snort2-online.rules" \ 450 | "../public/urlhaus-filter-snort3-online.rules" \ 451 | "../public/urlhaus-filter-suricata-online.rules" \ 452 | "../public/urlhaus-filter-suricata-sni-online.rules" \ 453 | "../public/urlhaus-filter-splunk-online.csv" 454 | 455 | export CURRENT_TIME 456 | node "../src/ids.js" 457 | 458 | sed -i "1i $COMMENT_ONLINE" "../public/urlhaus-filter-snort2-online.rules" 459 | sed -i "1s/Domains Blocklist/URL Snort2 Ruleset/" "../public/urlhaus-filter-snort2-online.rules" 460 | 461 | sed -i "1i $COMMENT_ONLINE" "../public/urlhaus-filter-snort3-online.rules" 462 | sed -i "1s/Domains Blocklist/URL Snort3 Ruleset/" "../public/urlhaus-filter-snort3-online.rules" 463 | 464 | sed -i "1i $COMMENT_ONLINE" "../public/urlhaus-filter-suricata-online.rules" 465 | sed -i "1s/Domains Blocklist/URL Suricata Ruleset/" "../public/urlhaus-filter-suricata-online.rules" 466 | 467 | sed -i "1i $COMMENT_ONLINE" "../public/urlhaus-filter-suricata-sni-online.rules" 468 | sed -i "1s/Domains Blocklist/Domains Suricata Ruleset (SNI)/" "../public/urlhaus-filter-suricata-sni-online.rules" 469 | 470 | sed -i -e "1i $COMMENT_ONLINE" -e '1i "host","path","message","updated"' "../public/urlhaus-filter-splunk-online.csv" 471 | sed -i "1s/Domains Blocklist/URL Splunk Lookup/" "../public/urlhaus-filter-splunk-online.csv" 472 | 473 | 474 | ## IE blocklist 475 | COMMENT_IE="msFilterList\n$COMMENT\n: Expires=1\n#" 476 | COMMENT_ONLINE_IE="msFilterList\n$COMMENT_ONLINE\n: Expires=1\n#" 477 | 478 | cat "malware-domains.txt" | \ 479 | sed -r "s/\[|\]//g" | \ 480 | sed "s/^/-d /" | \ 481 | sed "1i $COMMENT_IE" | \ 482 | sed "2s/Domains Blocklist/Hosts Blocklist (IE)/" > "../public/urlhaus-filter.tpl" 483 | 484 | cat "malware-domains-online.txt" | \ 485 | sed -r "s/\[|\]//g" | \ 486 | sed "s/^/-d /" | \ 487 | sed "1i $COMMENT_ONLINE_IE" | \ 488 | sed "2s/Domains Blocklist/Hosts Blocklist (IE)/" > "../public/urlhaus-filter-online.tpl" 489 | 490 | 491 | ## Clean up artifacts 492 | rm "URLhaus.csv" "top-1m-umbrella.zip" "top-1m-umbrella.txt" "top-1m-tranco.txt" "cf/" "top-1m-radar.txt" 493 | 494 | 495 | cd ../ 496 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Malicious URL Blocklist 2 | 3 | - [Lite version](#lite-version-online-links-only) 4 | - [Full version](#full-version) 5 | - Formats 6 | - [URL-based](#url-based) 7 | - [Domain-based](#domain-based) 8 | - [Wildcard asterisk](#wildcard-asterisk) 9 | - [Hosts-based](#hosts-based) 10 | - [Domain-based (AdGuard Home)](#domain-based-adguard-home) 11 | - [URL-based (AdGuard)](#url-based-adguard) 12 | - [URL-based (Vivaldi)](#url-based-vivaldi) 13 | - [Dnsmasq](#dnsmasq) 14 | - [BIND zone](#bind) 15 | - [RPZ](#response-policy-zone) 16 | - [Unbound](#unbound) 17 | - [dnscrypt-proxy](#dnscrypt-proxy) 18 | - [Snort2](#snort2) 19 | - [Snort3](#snort3) 20 | - [Suricata](#suricata) 21 | - [Suricata (SNI)](#suricata-sni) 22 | - [Splunk](#splunk) 23 | - [Tracking Protection List (IE)](#tracking-protection-list-ie) 24 | - [Compressed version](#compressed-version) 25 | - [Reporting issues](#issues) 26 | - [Cloning](#cloning) 27 | - [FAQ and Guides](#faq-and-guides) 28 | - [CI Variables](#ci-variables) 29 | - [License](#license) 30 | 31 | A blocklist of malicious websites that are being used for malware distribution, based on the **Database dump (CSV)** of Abuse.ch [URLhaus](https://urlhaus.abuse.ch/). Blocklist is updated twice a day. 32 | 33 | ## Lite version (online links only) 34 | 35 | Online status of URLs is checked by URLhaus. 36 | 37 | | Client | mirror 1 | mirror 2 | mirror 3 | mirror 4 | mirror 5 | mirror 6 | 38 | | --- | --- | --- | --- | --- | --- | --- | 39 | | [uBlock Origin](#url-based) ([*](#youtube-compatibility)) | [link](https://malware-filter.gitlab.io/malware-filter/urlhaus-filter-online.txt) | [link](https://curbengh.github.io/malware-filter/urlhaus-filter-online.txt) | [link](https://curbengh.github.io/urlhaus-filter/urlhaus-filter-online.txt) | [link](https://malware-filter.gitlab.io/urlhaus-filter/urlhaus-filter-online.txt) | [link](https://malware-filter.pages.dev/urlhaus-filter-online.txt) | [link](https://urlhaus-filter.pages.dev/urlhaus-filter-online.txt) | 40 | | [AdGuard Home/Pi-hole](#domain-based-adguard-home) | [link](https://malware-filter.gitlab.io/malware-filter/urlhaus-filter-agh-online.txt) | [link](https://curbengh.github.io/malware-filter/urlhaus-filter-agh-online.txt) | [link](https://curbengh.github.io/urlhaus-filter/urlhaus-filter-agh-online.txt) | [link](https://malware-filter.gitlab.io/urlhaus-filter/urlhaus-filter-agh-online.txt) | [link](https://malware-filter.pages.dev/urlhaus-filter-agh-online.txt) | [link](https://urlhaus-filter.pages.dev/urlhaus-filter-agh-online.txt) | 41 | | [AdGuard (browser extension)](#url-based-adguard) | [link](https://malware-filter.gitlab.io/malware-filter/urlhaus-filter-ag-online.txt) | [link](https://curbengh.github.io/malware-filter/urlhaus-filter-ag-online.txt) | [link](https://curbengh.github.io/urlhaus-filter/urlhaus-filter-ag-online.txt) | [link](https://malware-filter.gitlab.io/urlhaus-filter/urlhaus-filter-ag-online.txt) | [link](https://malware-filter.pages.dev/urlhaus-filter-ag-online.txt) | [link](https://urlhaus-filter.pages.dev/urlhaus-filter-ag-online.txt) | 42 | | [Vivaldi/Brave](#url-based-vivaldi) | [link](https://malware-filter.gitlab.io/malware-filter/urlhaus-filter-vivaldi-online.txt) | [link](https://curbengh.github.io/malware-filter/urlhaus-filter-vivaldi-online.txt) | [link](https://curbengh.github.io/urlhaus-filter/urlhaus-filter-vivaldi-online.txt) | [link](https://malware-filter.gitlab.io/urlhaus-filter/urlhaus-filter-vivaldi-online.txt) | [link](https://malware-filter.pages.dev/urlhaus-filter-vivaldi-online.txt) | [link](https://urlhaus-filter.pages.dev/urlhaus-filter-vivaldi-online.txt) | 43 | | [Hosts](#hosts-based) | [link](https://malware-filter.gitlab.io/malware-filter/urlhaus-filter-hosts-online.txt) | [link](https://curbengh.github.io/malware-filter/urlhaus-filter-hosts-online.txt) | [link](https://curbengh.github.io/urlhaus-filter/urlhaus-filter-hosts-online.txt) | [link](https://malware-filter.gitlab.io/urlhaus-filter/urlhaus-filter-hosts-online.txt) | [link](https://malware-filter.pages.dev/urlhaus-filter-hosts-online.txt) | [link](https://urlhaus-filter.pages.dev/urlhaus-filter-hosts-online.txt) | 44 | | [Dnsmasq](#dnsmasq) | [link](https://malware-filter.gitlab.io/malware-filter/urlhaus-filter-dnsmasq-online.conf) | [link](https://curbengh.github.io/malware-filter/urlhaus-filter-dnsmasq-online.conf) | [link](https://curbengh.github.io/urlhaus-filter/urlhaus-filter-dnsmasq-online.conf) | [link](https://malware-filter.gitlab.io/urlhaus-filter/urlhaus-filter-dnsmasq-online.conf) | [link](https://malware-filter.pages.dev/urlhaus-filter-dnsmasq-online.conf) | [link](https://urlhaus-filter.pages.dev/urlhaus-filter-dnsmasq-online.conf) | 45 | | BIND [zone](#bind) | [link](https://malware-filter.gitlab.io/malware-filter/urlhaus-filter-bind-online.conf) | [link](https://curbengh.github.io/malware-filter/urlhaus-filter-bind-online.conf) | [link](https://curbengh.github.io/urlhaus-filter/urlhaus-filter-bind-online.conf) | [link](https://malware-filter.gitlab.io/urlhaus-filter/urlhaus-filter-bind-online.conf) | [link](https://malware-filter.pages.dev/urlhaus-filter-bind-online.conf) | [link](https://urlhaus-filter.pages.dev/urlhaus-filter-bind-online.conf) | 46 | | BIND [RPZ](#response-policy-zone) | [link](https://malware-filter.gitlab.io/malware-filter/urlhaus-filter-rpz-online.conf) | [link](https://curbengh.github.io/malware-filter/urlhaus-filter-rpz-online.conf) | [link](https://curbengh.github.io/urlhaus-filter/urlhaus-filter-rpz-online.conf) | [link](https://malware-filter.gitlab.io/urlhaus-filter/urlhaus-filter-rpz-online.conf) | [link](https://malware-filter.pages.dev/urlhaus-filter-rpz-online.conf) | [link](https://urlhaus-filter.pages.dev/urlhaus-filter-rpz-online.conf) | 47 | | [dnscrypt-proxy](#dnscrypt-proxy) | [names-online.txt](https://malware-filter.gitlab.io/malware-filter/urlhaus-filter-dnscrypt-blocked-names-online.txt), [ips-online.txt](https://malware-filter.gitlab.io/malware-filter/urlhaus-filter-dnscrypt-blocked-ips-online.txt) | [names-online.txt](https://curbengh.github.io/malware-filter/urlhaus-filter-dnscrypt-blocked-names-online.txt), [ips-online.txt](https://curbengh.github.io/malware-filter/urlhaus-filter-dnscrypt-blocked-ips-online.txt) | [names-online.txt](https://curbengh.github.io/urlhaus-filter/urlhaus-filter-dnscrypt-blocked-names-online.txt), [ips-online.txt](https://curbengh.github.io/urlhaus-filter/urlhaus-filter-dnscrypt-blocked-ips-online.txt) | [names-online.txt](https://malware-filter.gitlab.io/urlhaus-filter/urlhaus-filter-dnscrypt-blocked-names-online.txt), [ips-online.txt](https://malware-filter.gitlab.io/urlhaus-filter/urlhaus-filter-dnscrypt-blocked-ips-online.txt) | [names-online.txt](https://malware-filter.pages.dev/urlhaus-filter-dnscrypt-blocked-names-online.txt), [ips-online.txt](https://malware-filter.pages.dev/urlhaus-filter-dnscrypt-blocked-ips-online.txt) | [names-online.txt](https://urlhaus-filter.pages.dev/urlhaus-filter-dnscrypt-blocked-names-online.txt), [ips-online.txt](https://urlhaus-filter.pages.dev/urlhaus-filter-dnscrypt-blocked-ips-online.txt) | 48 | | [blocky](#wildcard-asterisk) | [link](https://malware-filter.gitlab.io/malware-filter/urlhaus-filter-wildcard-online.txt) | [link](https://curbengh.github.io/malware-filter/urlhaus-filter-wildcard-online.txt) | [link](https://curbengh.github.io/phishing-filter/urlhaus-filter-wildcard-online.txt) | [link](https://malware-filter.gitlab.io/phishing-filter/urlhaus-filter-wildcard-online.txt) | [link](https://malware-filter.pages.dev/urlhaus-filter-wildcard-online.txt) | [link](https://phishing-filter.pages.dev/urlhaus-filter-wildcard-online.txt) | 49 | | [Snort2](#snort2) | [link](https://malware-filter.gitlab.io/malware-filter/urlhaus-filter-snort2-online.rules) | [link](https://curbengh.github.io/malware-filter/urlhaus-filter-snort2-online.rules) | [link](https://curbengh.github.io/urlhaus-filter/urlhaus-filter-snort2-online.rules) | [link](https://malware-filter.gitlab.io/urlhaus-filter/urlhaus-filter-snort2-online.rules) | [link](https://malware-filter.pages.dev/urlhaus-filter-snort2-online.rules) | [link](https://urlhaus-filter.pages.dev/urlhaus-filter-snort2-online.rules) | 50 | | [Snort3](#snort3) | [link](https://malware-filter.gitlab.io/malware-filter/urlhaus-filter-snort3-online.rules) | [link](https://curbengh.github.io/malware-filter/urlhaus-filter-snort3-online.rules) | [link](https://curbengh.github.io/urlhaus-filter/urlhaus-filter-snort3-online.rules) | [link](https://malware-filter.gitlab.io/urlhaus-filter/urlhaus-filter-snort3-online.rules) | [link](https://malware-filter.pages.dev/urlhaus-filter-snort3-online.rules) | [link](https://urlhaus-filter.pages.dev/urlhaus-filter-snort3-online.rules) | 51 | | [Suricata](#suricata) | [link](https://malware-filter.gitlab.io/malware-filter/urlhaus-filter-suricata-online.rules) | [link](https://curbengh.github.io/malware-filter/urlhaus-filter-suricata-online.rules) | [link](https://curbengh.github.io/urlhaus-filter/urlhaus-filter-suricata-online.rules) | [link](https://malware-filter.gitlab.io/urlhaus-filter/urlhaus-filter-suricata-online.rules) | [link](https://malware-filter.pages.dev/urlhaus-filter-suricata-online.rules) | [link](https://urlhaus-filter.pages.dev/urlhaus-filter-suricata-online.rules) | 52 | | [Suricata (SNI)](#suricata-sni) | [link](https://malware-filter.gitlab.io/malware-filter/urlhaus-filter-suricata-sni-online.rules) | [link](https://curbengh.github.io/malware-filter/urlhaus-filter-suricata-sni-online.rules) | [link](https://curbengh.github.io/urlhaus-filter/urlhaus-filter-suricata-sni-online.rules) | [link](https://malware-filter.gitlab.io/urlhaus-filter/urlhaus-filter-suricata-sni-online.rules) | [link](https://malware-filter.pages.dev/urlhaus-filter-suricata-sni-online.rules) | [link](https://urlhaus-filter.pages.dev/urlhaus-filter-suricata-sni-online.rules) | 53 | | [Splunk](#splunk) | [link](https://malware-filter.gitlab.io/malware-filter/urlhaus-filter-splunk-online.csv) | [link](https://curbengh.github.io/malware-filter/urlhaus-filter-splunk-online.csv) | [link](https://curbengh.github.io/urlhaus-filter/urlhaus-filter-splunk-online.csv) | [link](https://malware-filter.gitlab.io/urlhaus-filter/urlhaus-filter-splunk-online.csv) | [link](https://malware-filter.pages.dev/urlhaus-filter-splunk-online.csv) | [link](https://urlhaus-filter.pages.dev/urlhaus-filter-splunk-online.csv) | 54 | | [Internet Explorer](#tracking-protection-list-ie) | [link](https://malware-filter.gitlab.io/malware-filter/urlhaus-filter.tpl) | [link](https://curbengh.github.io/malware-filter/urlhaus-filter.tpl) | [link](https://curbengh.github.io/urlhaus-filter/urlhaus-filter.tpl) | [link](https://malware-filter.gitlab.io/urlhaus-filter/urlhaus-filter.tpl) | [link](https://malware-filter.pages.dev/urlhaus-filter.tpl) | [link](https://urlhaus-filter.pages.dev/urlhaus-filter.tpl) | 55 | 56 | ## Full version 57 | 58 | | Client | mirror 1 | mirror 2 | mirror 3 | mirror 4 | mirror 5 | mirror 6 | 59 | | --- | --- | --- | --- | --- | --- | --- | 60 | | [uBlock Origin](#url-based) ([*](#youtube-compatibility)) | [link](https://malware-filter.gitlab.io/malware-filter/urlhaus-filter.txt) | [link](https://curbengh.github.io/malware-filter/urlhaus-filter.txt) | [link](https://curbengh.github.io/urlhaus-filter/urlhaus-filter.txt) | [link](https://malware-filter.gitlab.io/urlhaus-filter/urlhaus-filter.txt) | [link](https://malware-filter.pages.dev/urlhaus-filter.txt) | [link](https://urlhaus-filter.pages.dev/urlhaus-filter.txt) | 61 | | [AdGuard Home/Pi-hole](#domain-based-adguard-home) | [link](https://malware-filter.gitlab.io/malware-filter/urlhaus-filter-agh.txt) | [link](https://curbengh.github.io/malware-filter/urlhaus-filter-agh.txt) | [link](https://curbengh.github.io/urlhaus-filter/urlhaus-filter-agh.txt) | [link](https://malware-filter.gitlab.io/urlhaus-filter/urlhaus-filter-agh.txt) | [link](https://malware-filter.pages.dev/urlhaus-filter-agh.txt) | [link](https://urlhaus-filter.pages.dev/urlhaus-filter-agh.txt) | 62 | | [AdGuard (browser extension)](#url-based-adguard) | [link](https://malware-filter.gitlab.io/malware-filter/urlhaus-filter-ag.txt) | [link](https://curbengh.github.io/malware-filter/urlhaus-filter-ag.txt) | [link](https://curbengh.github.io/urlhaus-filter/urlhaus-filter-ag.txt) | [link](https://malware-filter.gitlab.io/urlhaus-filter/urlhaus-filter-ag.txt) | [link](https://malware-filter.pages.dev/urlhaus-filter-ag.txt) | [link](https://urlhaus-filter.pages.dev/urlhaus-filter-ag.txt) | 63 | | [Vivaldi/Brave](#url-based-vivaldi) | [link](https://malware-filter.gitlab.io/malware-filter/urlhaus-filter-vivaldi.txt) | [link](https://curbengh.github.io/malware-filter/urlhaus-filter-vivaldi.txt) | [link](https://curbengh.github.io/urlhaus-filter/urlhaus-filter-vivaldi.txt) | [link](https://malware-filter.gitlab.io/urlhaus-filter/urlhaus-filter-vivaldi.txt) | [link](https://malware-filter.pages.dev/urlhaus-filter-vivaldi.txt) | [link](https://urlhaus-filter.pages.dev/urlhaus-filter-vivaldi.txt) | 64 | | [Hosts](#hosts-based) | [link](https://malware-filter.gitlab.io/malware-filter/urlhaus-filter-hosts.txt) | [link](https://curbengh.github.io/malware-filter/urlhaus-filter-hosts.txt) | [link](https://curbengh.github.io/urlhaus-filter/urlhaus-filter-hosts.txt) | [link](https://malware-filter.gitlab.io/urlhaus-filter/urlhaus-filter-hosts.txt) | [link](https://malware-filter.pages.dev/urlhaus-filter-hosts.txt) | [link](https://urlhaus-filter.pages.dev/urlhaus-filter-hosts.txt) | 65 | | [Dnsmasq](#dnsmasq) | [link](https://malware-filter.gitlab.io/malware-filter/urlhaus-filter-dnsmasq.conf) | [link](https://curbengh.github.io/malware-filter/urlhaus-filter-dnsmasq.conf) | [link](https://curbengh.github.io/urlhaus-filter/urlhaus-filter-dnsmasq.conf) | [link](https://malware-filter.gitlab.io/urlhaus-filter/urlhaus-filter-dnsmasq.conf) | [link](https://malware-filter.pages.dev/urlhaus-filter-dnsmasq.conf) | [link](https://urlhaus-filter.pages.dev/urlhaus-filter-dnsmasq.conf) | 66 | | BIND [zone](#bind) | [link](https://malware-filter.gitlab.io/malware-filter/urlhaus-filter-bind.conf) | [link](https://curbengh.github.io/malware-filter/urlhaus-filter-bind.conf) | [link](https://curbengh.github.io/urlhaus-filter/urlhaus-filter-bind.conf) | [link](https://malware-filter.gitlab.io/urlhaus-filter/urlhaus-filter-bind.conf) | [link](https://malware-filter.pages.dev/urlhaus-filter-bind.conf) | [link](https://urlhaus-filter.pages.dev/urlhaus-filter-bind.conf) | 67 | | BIND [RPZ](#response-policy-zone) | [link](https://malware-filter.gitlab.io/malware-filter/urlhaus-filter-rpz.conf) | [link](https://curbengh.github.io/malware-filter/urlhaus-filter-rpz.conf) | [link](https://curbengh.github.io/urlhaus-filter/urlhaus-filter-rpz.conf) | [link](https://malware-filter.gitlab.io/urlhaus-filter/urlhaus-filter-rpz.conf) | [link](https://malware-filter.pages.dev/urlhaus-filter-rpz.conf) | [link](https://urlhaus-filter.pages.dev/urlhaus-filter-rpz.conf) | 68 | | [dnscrypt-proxy](#dnscrypt-proxy) | [names.txt](https://malware-filter.gitlab.io/malware-filter/urlhaus-filter-dnscrypt-blocked-names.txt), [ips.txt](https://malware-filter.gitlab.io/malware-filter/urlhaus-filter-dnscrypt-blocked-ips.txt) | [names.txt](https://curbengh.github.io/malware-filter/urlhaus-filter-dnscrypt-blocked-names.txt), [ips.txt](https://curbengh.github.io/malware-filter/urlhaus-filter-dnscrypt-blocked-ips.txt) | [names.txt](https://curbengh.github.io/urlhaus-filter/urlhaus-filter-dnscrypt-blocked-names.txt), [ips.txt](https://curbengh.github.io/urlhaus-filter/urlhaus-filter-dnscrypt-blocked-ips.txt) | [names.txt](https://malware-filter.gitlab.io/urlhaus-filter/urlhaus-filter-dnscrypt-blocked-names.txt), [ips.txt](https://malware-filter.gitlab.io/urlhaus-filter/urlhaus-filter-dnscrypt-blocked-ips.txt) | [names.txt](https://malware-filter.pages.dev/urlhaus-filter-dnscrypt-blocked-names.txt), [ips.txt](https://malware-filter.pages.dev/urlhaus-filter-dnscrypt-blocked-ips.txt) | [names.txt](https://urlhaus-filter.pages.dev/urlhaus-filter-dnscrypt-blocked-names.txt), [ips.txt](https://urlhaus-filter.pages.dev/urlhaus-filter-dnscrypt-blocked-ips.txt) | 69 | | [blocky](#wildcard-asterisk) | [link](https://malware-filter.gitlab.io/malware-filter/urlhaus-filter-wildcard.txt) | [link](https://curbengh.github.io/malware-filter/urlhaus-filter-wildcard.txt) | [link](https://curbengh.github.io/phishing-filter/urlhaus-filter-wildcard.txt) | [link](https://malware-filter.gitlab.io/phishing-filter/urlhaus-filter-wildcard.txt) | [link](https://malware-filter.pages.dev/urlhaus-filter-wildcard.txt) | [link](https://phishing-filter.pages.dev/urlhaus-filter-wildcard.txt) | 70 | | [Internet Explorer](#tracking-protection-list-ie) | [link](https://malware-filter.gitlab.io/malware-filter/urlhaus-filter.tpl) | [link](https://curbengh.github.io/malware-filter/urlhaus-filter.tpl) | [link](https://curbengh.github.io/urlhaus-filter/urlhaus-filter.tpl) | [link](https://malware-filter.gitlab.io/urlhaus-filter/urlhaus-filter.tpl) | [link](https://malware-filter.pages.dev/urlhaus-filter.tpl) | [link](https://urlhaus-filter.pages.dev/urlhaus-filter.tpl) | 71 | 72 | For other programs, see [Compatibility](https://gitlab.com/malware-filter/malware-filter/wikis/compatibility) page in the wiki. 73 | 74 | Check out my other filters: 75 | 76 | - [phishing-filter](https://gitlab.com/malware-filter/phishing-filter) 77 | - [pup-filter](https://gitlab.com/malware-filter/pup-filter) 78 | - [tracking-filter](https://gitlab.com/malware-filter/tracking-filter) 79 | - [vn-badsite-filter](https://gitlab.com/malware-filter/vn-badsite-filter) 80 | 81 | ## URL-based 82 | 83 | Import the full version into uBO to block online and **offline** malicious websites. 84 | 85 | Lite version includes **online** links only. Enabled by default in uBO >=[1.28.2](https://github.com/gorhill/uBlock/releases/tag/1.28.2) 86 | 87 | **Note:** Lite version is 99% smaller by excluding offline urls. The status of urls is determined by the upstream Abuse.ch. However, the test is not 100% accurate and some malicious urls that are otherwise accessible may be missed. If bandwidth (9 MB/day) is not a constraint, I recommend the regular version; browser extensions may utilise [HTTP compression](https://developer.mozilla.org/en-US/docs/Web/HTTP/Compression) that can save 70% of bandwidth. 88 | 89 | Regular version contains >260K filters, do note that uBO can [easily handle](https://github.com/uBlockOrigin/uBlock-issues/issues/338#issuecomment-452843669) 500K filters. 90 | 91 | If you've installed the lite version but prefer to use the regular version, it's better to remove it beforehand. Having two versions at the same time won't cause any conflict issue, uBO can detect duplicate network filters and adjust accordingly, but it's a waste of your bandwidth. 92 | 93 | **AdGuard Home** users should use [this blocklist](#domain-based-adguard-home). 94 | 95 | ### Youtube compatibility 96 | 97 | [AdGuard format](#url-based-adguard) may have less youtube [issue](https://github.com/gorhill/uBlock/commit/402e2ebf57). 98 | 99 | ## URL-based (AdGuard) 100 | 101 | Import the full version into AdGuard browser extensions to block online and **offline** malicious websites. 102 | 103 | Lite version includes **online** links only. 104 | 105 | ## URL-based (Vivaldi) 106 | 107 | For Vivaldi, blocking level must be at least "Block Trackers". Import the full version into Vivaldi's **Tracker Blocking Sources** to block online and **offline** malicious websites. 108 | 109 | For Brave, "Trackers & ads blocking" must be set to Aggressive. Import it under Shields > Content filtering > Add custom filter lists. 110 | 111 | Lite version includes **online** links only. 112 | 113 | ## Domain-based 114 | 115 | This blocklist includes domains and IP addresses. 116 | 117 | ## Wildcard asterisk 118 | 119 | This blocklist includes domains and IP addresses. 120 | 121 | ## Domain-based (AdGuard Home) 122 | 123 | This AdGuard Home-compatible blocklist includes domains and IP addresses. Also compatible with Pi-hole. 124 | 125 | ## Hosts-based 126 | 127 | This blocklist includes domains only. 128 | 129 | ## Dnsmasq 130 | 131 | This blocklist includes domains only. 132 | 133 | Save the ruleset to "/usr/local/etc/dnsmasq/urlhaus-filter-dnsmasq.conf". Refer to this [guide](https://gitlab.com/malware-filter/malware-filter/wikis/update-filter) for auto-update. 134 | 135 | Configure dnsmasq to use the blocklist: 136 | 137 | `printf "\nconf-file=/usr/local/etc/dnsmasq/urlhaus-filter-dnsmasq.conf\n" >> /etc/dnsmasq.conf` 138 | 139 | ## BIND 140 | 141 | This blocklist includes domains only. 142 | 143 | Save the ruleset to "/usr/local/etc/bind/urlhaus-filter-bind.conf". Refer to this [guide](https://gitlab.com/malware-filter/malware-filter/wikis/update-filter) for auto-update. 144 | 145 | Configure BIND to use the blocklist: 146 | 147 | `printf '\ninclude "/usr/local/etc/bind/urlhaus-filter-bind.conf";\n' >> /etc/bind/named.conf` 148 | 149 | Add this to "/etc/bind/null.zone.file" (skip this step if the file already exists): 150 | 151 | ``` 152 | $TTL 86400 ; one day 153 | @ IN SOA ns.nullzone.loc. ns.nullzone.loc. ( 154 | 2017102203 155 | 28800 156 | 7200 157 | 864000 158 | 86400 ) 159 | NS ns.nullzone.loc. 160 | A 0.0.0.0 161 | @ IN A 0.0.0.0 162 | * IN A 0.0.0.0 163 | ``` 164 | 165 | Zone file is derived from [here](https://github.com/tomzuu/blacklist-named/blob/master/null.zone.file). 166 | 167 | 168 | 169 | ## Response Policy Zone 170 | 171 | This blocklist includes domains only. 172 | 173 | ## Unbound 174 | 175 | This blocklist includes domains only. 176 | 177 | Save the rulesets to "/usr/local/etc/unbound/urlhaus-filter-unbound.conf". Refer to this [guide](https://gitlab.com/malware-filter/malware-filter/wikis/update-filter) for auto-update. 178 | 179 | Configure Unbound to use the blocklist: 180 | 181 | `printf '\n include: "/usr/local/etc/unbound/urlhaus-filter-unbound.conf"\n' >> /etc/unbound/unbound.conf` 182 | 183 | ## dnscrypt-proxy 184 | 185 | Save the rulesets to "/etc/dnscrypt-proxy/". Refer to this [guide](https://gitlab.com/malware-filter/malware-filter/wikis/update-filter) for auto-update. 186 | 187 | Configure dnscrypt-proxy to use the blocklist: 188 | 189 | ```diff 190 | [blocked_names] 191 | + blocked_names_file = '/etc/dnscrypt-proxy/urlhaus-filter-dnscrypt-blocked-names.txt' 192 | 193 | [blocked_ips] 194 | + blocked_ips_file = '/etc/dnscrypt-proxy/urlhaus-filter-dnscrypt-blocked-ips.txt' 195 | ``` 196 | 197 | ## Snort2 198 | 199 | This ruleset includes online URLs only. Not compatible with [Snort3](#snort3). Save the ruleset to "/etc/snort/rules/urlhaus-filter-snort2-online.rules". Refer to this [guide](https://gitlab.com/malware-filter/malware-filter/wikis/update-filter) for auto-update. 200 | 201 | Configure Snort to use the ruleset: 202 | 203 | `printf "\ninclude \$RULE_PATH/urlhaus-filter-snort2-online.rules\n" >> /etc/snort/snort.conf` 204 | 205 | ## Snort3 206 | 207 | This ruleset includes online URLs only. Not compatible with [Snort2](#snort2). 208 | 209 | Save the ruleset to "/etc/snort/rules/urlhaus-filter-snort3-online.rules". Refer to this [guide](https://gitlab.com/malware-filter/malware-filter/wikis/update-filter) for auto-update. 210 | 211 | Configure Snort to use the ruleset: 212 | 213 | ```diff 214 | # /etc/snort/snort.lua 215 | ips = 216 | { 217 | variables = default_variables, 218 | + include = 'rules/urlhaus-filter-snort3-online.rules' 219 | } 220 | ``` 221 | 222 | ## Suricata 223 | 224 | This ruleset includes online URLs only. 225 | 226 | Save the ruleset to "/etc/suricata/rules/urlhaus-filter-suricata-online.rules". Refer to this [guide](https://gitlab.com/malware-filter/malware-filter/wikis/update-filter) for auto-update. 227 | 228 | Configure Suricata to use the ruleset: 229 | 230 | ```diff 231 | # /etc/suricata/suricata.yaml 232 | rule-files: 233 | - local.rules 234 | + - urlhaus-filter-suricata-online.rules 235 | ``` 236 | 237 | ### Suricata (SNI) 238 | 239 | This ruleset includes online domains only. It enables Suricata to detect malicious HTTPS-enabled domains by inspecting the SNI in the [unencrypted ClientHello](https://en.wikipedia.org/wiki/Server_Name_Indication#Security_implications) message. However, there is increasing support for encrypted Client Hello which defeats SNI inspection. 240 | 241 | ## Splunk 242 | 243 | A CSV file for Splunk [lookup](https://docs.splunk.com/Documentation/Splunk/latest/Knowledge/Aboutlookupsandfieldactions). This ruleset includes online URLs only. 244 | 245 | Either upload the file via GUI or save the file in `$SPLUNK_HOME/Splunk/etc/system/lookups` or app-specific `$SPLUNK_HOME/etc/YourApp/apps/search/lookups` 246 | 247 | Or use [malware-filter add-on](https://splunkbase.splunk.com/app/6970) to install this lookup and optionally auto-update it. 248 | 249 | Columns: 250 | 251 | | host | path | message | updated | 252 | | --- | --- | --- | --- | 253 | | example.com | | urlhaus-filter malicious website detected | 2022-12-21T12:34:56Z | 254 | | example2.com | /some-path | urlhaus-filter malicious website detected | 2022-12-21T12:34:56Z | 255 | 256 | ## Tracking Protection List (IE) 257 | 258 | This blocklist includes domains and IP addresses. Supported in Internet Explorer 9+. [Install guide](https://superuser.com/a/550539) 259 | 260 | ## Third-party mirrors 261 | 262 |
263 | iosprivacy/urlhaus-filter-mirror 264 | 265 | TBC 266 | 267 |
268 | 269 | ## Compressed version 270 | 271 | All filters are also available as gzip- and brotli-compressed. 272 | 273 | - Gzip: https://malware-filter.gitlab.io/malware-filter/urlhaus-filter.txt.gz 274 | - Brotli: https://malware-filter.gitlab.io/malware-filter/urlhaus-filter.txt.br 275 | - Zstd: https://malware-filter.gitlab.io/malware-filter/urlhaus-filter.txt.zst 276 | 277 | ## Issues 278 | 279 | This blocklist operates by blocking the **whole** website, instead of specific webpages; exceptions are made on popular websites (e.g. `https://docs.google.com/`), in which webpages are specified instead (e.g. `https://docs.google.com/malware-page`). Malicious webpages are only listed in the [URL-based](#url-based) filter, popular websites are excluded from other filters. 280 | 281 | _Popular_ websites are as listed in the [Umbrella Popularity List](https://s3-us-west-1.amazonaws.com/umbrella-static/index.html) (top 1M domains + subdomains), [Tranco List](https://tranco-list.eu/) (top 1M domains), [Cloudflare Radar](https://developers.cloudflare.com/radar/investigate/domain-ranking-datasets/) (top 1M domains) and this [custom list](src/exclude.txt). 282 | 283 | If you wish to exclude certain website(s) that you believe is sufficiently well-known, please create an [issue](https://gitlab.com/malware-filter/urlhaus-filter/issues) or [merge request](https://gitlab.com/malware-filter/urlhaus-filter/merge_requests). If the website is quite obscure but you still want to visit it, you can add a new line `||legitsite.com^$badfilter` to "My filters" tab of uBO; use a subdomain if relevant, `||sub.legitsite.com^$badfilter`. 284 | 285 | This filter **only** accepts new malware URLs from [URLhaus](https://urlhaus.abuse.ch/). 286 | 287 | Please report new malware URL to the upstream maintainer through https://urlhaus.abuse.ch/api/#submit. 288 | 289 | ## Cloning 290 | 291 | Getting the last five revisions should be sufficient for a valid MR. 292 | 293 | `git clone --depth 5 https://gitlab.com/malware-filter/urlhaus-filter.git` 294 | 295 | ## FAQ and Guides 296 | 297 | See [wiki](https://gitlab.com/malware-filter/malware-filter/-/wikis/home) 298 | 299 | ## CI Variables 300 | 301 | Optional variables: 302 | 303 | - `CLOUDFLARE_BUILD_HOOK`: Deploy to Cloudflare Pages. 304 | - `NETLIFY_SITE_ID`: Deploy to Netlify. 305 | - `CF_API`: Include Cloudflare Radar [domains ranking](https://developers.cloudflare.com/radar/investigate/domain-ranking-datasets/). [Guide](https://developers.cloudflare.com/radar/get-started/first-request/) to create an API token. 306 | 307 | ## Repository Mirrors 308 | 309 | https://gitlab.com/curben/blog#repository-mirrors 310 | 311 | ## License 312 | 313 | [Creative Commons Zero v1.0 Universal](LICENSE-CC0.md) and [MIT License](LICENSE) 314 | 315 | [URLhaus](https://urlhaus.abuse.ch/): [CC0](https://creativecommons.org/publicdomain/zero/1.0/) 316 | 317 | [Tranco List](https://tranco-list.eu/): [MIT License](https://choosealicense.com/licenses/mit/) 318 | 319 | [Umbrella Popularity List](https://s3-us-west-1.amazonaws.com/umbrella-static/index.html): Available free of charge by Cisco Umbrella 320 | 321 | [Cloudflare Radar](https://developers.cloudflare.com/radar/investigate/domain-ranking-datasets/): Available to free Cloudflare account 322 | 323 | This repository is not endorsed by Abuse.ch. 324 | --------------------------------------------------------------------------------