├── .nvmrc
├── .gitignore
├── wrangler.toml
├── package.json
├── src
    ├── exclude-url.txt
    ├── gitlab_status.sh
    ├── build.js
    ├── exclude.txt
    ├── ids.js
    ├── clean_url.js
    └── script.sh
├── LICENSE
├── .gitlab-ci.yml
├── .github
    └── workflows
    │   └── pages.yml
├── LICENSE-CC0.md
└── README.md


/.nvmrc:
--------------------------------------------------------------------------------
1 | lts/*
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | tmp/
2 | .vscode/
3 | public/
4 | node_modules/
5 | 


--------------------------------------------------------------------------------
/wrangler.toml:
--------------------------------------------------------------------------------
1 | name = "urlhaus-filter"
2 | pages_build_output_dir = "public"
3 | 
4 | [vars]
5 | ASDF_NODEJS_LEGACY_FILE_DYNAMIC_STRATEGY = "latest_available"
6 | 
7 | [env.production.vars]
8 | ASDF_NODEJS_LEGACY_FILE_DYNAMIC_STRATEGY = "latest_available"
9 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "urlhaus-filter",
 3 |   "private": true,
 4 |   "scripts": {
 5 |     "build": "node src/build.js"
 6 |   },
 7 |   "dependencies": {
 8 |     "unzipper": "^0.12.3"
 9 |   },
10 |   "engines": {
11 |     "node": ">= 18.12.0"
12 |   },
13 |   "type": "module"
14 | }
15 | 


--------------------------------------------------------------------------------
/src/exclude-url.txt:
--------------------------------------------------------------------------------
1 | # URL exclusion list
2 | # Domains/URLs listed here will be excluded from domain-based and URL-based filters
3 | # Any entry with slash (/) will not be applied to domain-based filters
4 | # Include only top 1m (sub)domains that do not host user content
5 | github.githubassets.com
6 | 


--------------------------------------------------------------------------------
/src/gitlab_status.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | ARTIFACT_STATUS=$(curl -sSIL "https://gitlab.com/malware-filter/urlhaus-filter/-/jobs/artifacts/main/download?job=pages" | grep -F "HTTP/2 200")
 4 | PIPELINE_STATUS=$(curl -sSL "https://gitlab.com/malware-filter/urlhaus-filter/badges/main/pipeline.svg" | grep -F "failed")
 5 | GITLAB_STATUS="up"
 6 | 
 7 | if [ -z "$ARTIFACT_STATUS" ] || [ -n "$PIPELINE_STATUS" ]; then
 8 |   GITLAB_STATUS="down"
 9 | fi
10 | 
11 | echo "GITLAB_STATUS=$GITLAB_STATUS" >> "$GITHUB_ENV"
12 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 Ming Di Leom
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/.gitlab-ci.yml:
--------------------------------------------------------------------------------
 1 | image: node:lts-alpine
 2 | 
 3 | include:
 4 |   - template: Security/Secret-Detection.gitlab-ci.yml
 5 | 
 6 | # Only run pipeline when scheduled or "Run pipeline" in the main branch
 7 | workflow:
 8 |   rules:
 9 |     - if: '$CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH && ($CI_PIPELINE_SOURCE == "schedule" || $CI_PIPELINE_SOURCE == "web")'
10 | 
11 | build_job:
12 |   stage: build
13 | 
14 |   before_script:
15 |     - apk update && apk add brotli curl file grep jq zstd
16 | 
17 |   script:
18 |     - sh src/script.sh
19 |     - find public -type f -regex '.*\.\(txt\|conf\|tpl\|rules\|csv\)$' -exec gzip -f -k -9 {} \;
20 |     - find public -type f -regex '.*\.\(txt\|conf\|tpl\|rules\|csv\)$' -exec brotli -f -k -9 {} \;
21 |     - find public -type f -regex '.*\.\(txt\|conf\|tpl\|rules\|csv\)$' -exec zstd -f -k -9 {} \;
22 | 
23 |   artifacts:
24 |     paths:
25 |       - tmp
26 |       - public
27 |     expire_in: 1 week
28 | 
29 | pages:
30 |   stage: deploy
31 | 
32 |   dependencies:
33 |     - build_job
34 | 
35 |   script:
36 |     - echo
37 | 
38 |   artifacts:
39 |     paths:
40 |       - public
41 |     expire_in: 1 week
42 | 
43 | cloudflare:
44 |   stage: deploy
45 | 
46 |   before_script:
47 |     - apk update && apk add curl
48 | 
49 |   script:
50 |     - curl -X POST "https://api.cloudflare.com/client/v4/pages/webhooks/deploy_hooks/$CLOUDFLARE_BUILD_HOOK"
51 | 
52 |   rules:
53 |     - if: $CLOUDFLARE_BUILD_HOOK
54 | 
55 | netlify:
56 |   stage: deploy
57 | 
58 |   dependencies:
59 |     - build_job
60 | 
61 |   before_script:
62 |     - npm install netlify-cli -g
63 |     - netlify --telemetry-disable
64 | 
65 |   script:
66 |     - netlify deploy --dir=public --prod
67 | 
68 |   cache:
69 |     paths:
70 |       - node_modules/
71 | 
72 |   rules:
73 |     - if: $NETLIFY_SITE_ID
74 | 


--------------------------------------------------------------------------------
/src/build.js:
--------------------------------------------------------------------------------
 1 | 'use strict'
 2 | 
 3 | // for deployment outside of GitLab CI, e.g. Cloudflare Pages and Netlify
 4 | 
 5 | import { Extract } from 'unzipper'
 6 | import { dirname, join } from 'node:path'
 7 | import { mkdir } from 'node:fs/promises'
 8 | import { pipeline } from 'node:stream/promises'
 9 | import { fileURLToPath } from 'node:url'
10 | import { Readable } from 'node:stream'
11 | 
12 | const __dirname = dirname(fileURLToPath(import.meta.url))
13 | const rootPath = join(__dirname, '..')
14 | const publicPath = join(rootPath, 'public')
15 | const artifactsUrl = 'https://gitlab.com/malware-filter/urlhaus-filter/-/jobs/artifacts/main/download?job=pages'
16 | const pipelineUrl = 'https://gitlab.com/malware-filter/urlhaus-filter/badges/main/pipeline.svg'
17 | const ghMirror = 'https://nightly.link/curbengh/urlhaus-filter/workflows/pages/main/public.zip'
18 | 
19 | const pipelineStatus = async (url) => {
20 |   console.log(`Checking pipeline from "${url}"`)
21 |   try {
22 |     const svg = await (await fetch(url)).text()
23 |     if (svg.includes('failed')) throw new Error('last gitlab pipeline failed')
24 |   } catch ({ message }) {
25 |     throw new Error(message)
26 |   }
27 | }
28 | 
29 | const f = async () => {
30 |   console.log(`Downloading artifacts.zip from "${artifactsUrl}"`)
31 |   try {
32 |     await pipeline(
33 |       Readable.fromWeb((await fetch(artifactsUrl)).body),
34 |       Extract({ path: rootPath })
35 |     )
36 |     await pipelineStatus(pipelineUrl)
37 |   } catch ({ message }) {
38 |     console.error(JSON.stringify({
39 |       error: message,
40 |       link: artifactsUrl
41 |     }))
42 | 
43 |     console.log(`Downloading artifacts.zip from "${ghMirror}"`)
44 | 
45 |     await mkdir(publicPath, { recursive: true })
46 | 
47 |     try {
48 |       await pipeline(
49 |         Readable.fromWeb((await fetch(ghMirror)).body),
50 |         Extract({ path: publicPath })
51 |       )
52 |     } catch ({ message }) {
53 |       throw new Error(JSON.stringify({
54 |         error: message,
55 |         link: ghMirror
56 |       }))
57 |     }
58 |   }
59 | }
60 | 
61 | f()
62 | 


--------------------------------------------------------------------------------
/.github/workflows/pages.yml:
--------------------------------------------------------------------------------
 1 | name: Pages
 2 | 
 3 | on:
 4 |   schedule:
 5 |     - cron: "0 0,12 * * *"
 6 |   workflow_dispatch:
 7 | 
 8 | jobs:
 9 |   pages:
10 |     runs-on: ubuntu-latest
11 |     container: node:lts-alpine
12 |     steps:
13 |       - uses: actions/checkout@v4
14 |       - name: Install Dependencies
15 |         run: |
16 |           apk update
17 |           apk add brotli curl file git grep jq zstd
18 |       - name: Build
19 |         env:
20 |           CF_API: ${{ secrets.CF_API }}
21 |         run: sh ./src/script.sh
22 |       - name: Compress
23 |         run: |
24 |           find public -type f -regex '.*\.\(txt\|conf\|tpl\|rules\|csv\)$' -exec gzip -f -k -9 {} \;
25 |           find public -type f -regex '.*\.\(txt\|conf\|tpl\|rules\|csv\)$' -exec brotli -f -k -9 {} \;
26 |           find public -type f -regex '.*\.\(txt\|conf\|tpl\|rules\|csv\)$' -exec zstd -f -k -9 {} \;
27 |       - name: Deploy
28 |         uses: peaceiris/actions-gh-pages@v4
29 |         with:
30 |           github_token: ${{ secrets.GITHUB_TOKEN }}
31 |           publish_dir: ./public
32 |           force_orphan: true
33 |       - name: "Upload Public Folder"
34 |         uses: actions/upload-artifact@v4
35 |         with:
36 |           name: public
37 |           path: ./public
38 |           retention-days: 30
39 |       - name: "Upload Tmp Folder"
40 |         uses: actions/upload-artifact@v4
41 |         with:
42 |           name: tmp
43 |           path: ./tmp
44 |           retention-days: 30
45 |       - name: Check GitLab Status
46 |         env:
47 |           GITHUB_ENV: ${{ env.GITHUB_ENV }}
48 |         run: sh ./src/gitlab_status.sh
49 |       - name: Cloudflare Pages
50 |         env:
51 |           CLOUDFLARE_BUILD_HOOK: ${{ secrets.CLOUDFLARE_BUILD_HOOK }}
52 |         if: ${{ env.CLOUDFLARE_BUILD_HOOK != 0 && env.GITLAB_STATUS == 'down' }}
53 |         run: curl -X POST "https://api.cloudflare.com/client/v4/pages/webhooks/deploy_hooks/${{ env.CLOUDFLARE_BUILD_HOOK }}"
54 |       - name: Netlify
55 |         env:
56 |           NETLIFY_SITE_ID: ${{ secrets.NETLIFY_SITE_ID }}
57 |           NETLIFY_AUTH_TOKEN: ${{ secrets.NETLIFY_AUTH_TOKEN }}
58 |         if: ${{ env.NETLIFY_SITE_ID != 0 && env.GITLAB_STATUS == 'down' }}
59 |         run: |
60 |           npm install netlify-cli -g
61 |           netlify --telemetry-disable
62 |           netlify deploy --dir=public --prod
63 | 


--------------------------------------------------------------------------------
/src/exclude.txt:
--------------------------------------------------------------------------------
 1 | # Domain exclusion list
 2 | # Domains listed here will be excluded from domain-based filters only, not URL-based filters
 3 | void.cat
 4 | pdesaa.cimaa.pt
 5 | users.telenet.be
 6 | u.teknik.io
 7 | digitalschnitt.de
 8 | cd.textfiles.com
 9 | s3.amazonaws.com
10 | s3.us-east-2.amazonaws.com
11 | s3.us-east-1.amazonaws.com
12 | s3.us-west-1.amazonaws.com
13 | s3.us-west-2.amazonaws.com
14 | s3.af-south-1.amazonaws.com
15 | s3.ap-east-1.amazonaws.com
16 | s3.ap-south-2.amazonaws.com
17 | s3.ap-southeast-3.amazonaws.com
18 | s3.ap-southeast-4.amazonaws.com
19 | s3.ap-south-1.amazonaws.com
20 | s3.ap-northeast-3.amazonaws.com
21 | s3.ap-northeast-2.amazonaws.com
22 | s3.ap-southeast-1.amazonaws.com
23 | s3.ap-southeast-2.amazonaws.com
24 | s3.ap-northeast-1.amazonaws.com
25 | s3.ca-central-1.amazonaws.com
26 | s3.eu-central-1.amazonaws.com
27 | s3.eu-west-1.amazonaws.com
28 | s3.eu-west-2.amazonaws.com
29 | s3.eu-south-1.amazonaws.com
30 | s3.eu-west-3.amazonaws.com
31 | s3.eu-south-2.amazonaws.com
32 | s3.eu-north-1.amazonaws.com
33 | s3.eu-central-2.amazonaws.com
34 | s3.me-south-1.amazonaws.com
35 | s3.me-central-1.amazonaws.com
36 | s3.sa-east-1.amazonaws.com
37 | s3.us-gov-east-1.amazonaws.com
38 | s3.us-gov-west-1.amazonaws.com
39 | s3.cn-north-1.amazonaws.com.cn
40 | s3.cn-northwest-1.amazonaws.com.cn
41 | s3-us-east-2.amazonaws.com
42 | s3-us-east-1.amazonaws.com
43 | s3-us-west-1.amazonaws.com
44 | s3-us-west-2.amazonaws.com
45 | s3-af-south-1.amazonaws.com
46 | s3-ap-east-1.amazonaws.com
47 | s3-ap-south-1.amazonaws.com
48 | s3-ap-northeast-3.amazonaws.com
49 | s3-ap-northeast-2.amazonaws.com
50 | s3-ap-southeast-1.amazonaws.com
51 | s3-ap-southeast-2.amazonaws.com
52 | s3-ap-northeast-1.amazonaws.com
53 | s3-ca-central-1.amazonaws.com
54 | s3-cn-north-1.amazonaws.com.cn
55 | s3-cn-northwest-1.amazonaws.com.cn
56 | s3-eu-central-1.amazonaws.com
57 | s3-eu-west-1.amazonaws.com
58 | s3-eu-west-2.amazonaws.com
59 | s3-eu-south-1.amazonaws.com
60 | s3-eu-west-3.amazonaws.com
61 | s3-eu-north-1.amazonaws.com
62 | s3-sa-east-1.amazonaws.com
63 | s3-me-south-1.amazonaws.com
64 | s3-us-gov-east-1.amazonaws.com
65 | s3-us-gov-west-1.amazonaws.com
66 | dl.packetstormsecurity.net
67 | cfs5.tistory.com
68 | litter.catbox.moe
69 | link.storjshare.io
70 | storage.bunnycdn.com
71 | eu2.contabostorage.com
72 | sin1.contabostorage.com
73 | usc1.contabostorage.com
74 | res.cloudinary.com
75 | landley.net
76 | r2.e-z.host
77 | a.uguu.se
78 | f.uguu.se
79 | i.uguu.se
80 | x0.at
81 | 


--------------------------------------------------------------------------------
/src/ids.js:
--------------------------------------------------------------------------------
 1 | import { createWriteStream } from 'node:fs'
 2 | import { open } from 'node:fs/promises'
 3 | 
 4 | const domains = await open('malware-domains-online.txt')
 5 | const urls = await open('malware-url-top-domains-raw-online.txt')
 6 | 
 7 | const snort2 = createWriteStream('../public/urlhaus-filter-snort2-online.rules', {
 8 |   encoding: 'utf8',
 9 |   flags: 'a'
10 | })
11 | const snort3 = createWriteStream('../public/urlhaus-filter-snort3-online.rules', {
12 |   encoding: 'utf8',
13 |   flags: 'a'
14 | })
15 | const suricata = createWriteStream('../public/urlhaus-filter-suricata-online.rules', {
16 |   encoding: 'utf8',
17 |   flags: 'a'
18 | })
19 | const suricataSni = createWriteStream('../public/urlhaus-filter-suricata-sni-online.rules', {
20 |   encoding: 'utf8',
21 |   flags: 'a'
22 | })
23 | const splunk = createWriteStream('../public/urlhaus-filter-splunk-online.csv', {
24 |   encoding: 'utf8',
25 |   flags: 'a'
26 | })
27 | 
28 | let sid = 100000001
29 | 
30 | for await (const domain of domains.readLines()) {
31 |   snort2.write(`alert tcp $HOME_NET any -> $EXTERNAL_NET [80,443] (msg:"urlhaus-filter malicious website detected"; flow:established,from_client; content:"GET"; http_method; content:"${domain}"; content:"Host"; http_header; classtype:trojan-activity; sid:${sid}; rev:1;)\n`)
32 |   snort3.write(`alert http $HOME_NET any -> $EXTERNAL_NET any (msg:"urlhaus-filter malicious website detected"; http_header:field host; content:"${domain}",nocase; classtype:trojan-activity; sid:${sid}; rev:1;)\n`)
33 |   suricata.write(`alert http $HOME_NET any -> $EXTERNAL_NET any (msg:"urlhaus-filter malicious website detected"; flow:established,from_client; http.method; content:"GET"; http.host; content:"${domain}"; classtype:trojan-activity; sid:${sid} rev:1;)\n`)
34 |   suricataSni.write(`alert tls $HOME_NET any -> $EXTERNAL_NET any (msg:"urlhaus-filter malicious website detected"; flow:established,from_client; tls.sni; bsize:32; content:"${domain}"; fast_pattern; classtype:trojan-activity; sid:${sid} rev:1;)\n`)
35 |   splunk.write(`"${domain}","","urlhaus-filter malicious website detected","${process.env.CURRENT_TIME}"\n`)
36 | 
37 |   sid++
38 | }
39 | 
40 | suricataSni.close()
41 | 
42 | for await (const line of urls.readLines()) {
43 |   if (!URL.canParse(`http://${line}`)) {
44 |     console.error(`Invalid URL: ${line}`)
45 |     continue
46 |   }
47 | 
48 |   const url = new URL(`http://${line}`)
49 |   const { hostname, pathname, search } = url
50 |   const pathEscape = `${pathname}${search}`.replaceAll(';', '\\;')
51 |   const path = pathname + search
52 | 
53 |   snort2.write(`alert tcp $HOME_NET any -> $EXTERNAL_NET [80,443] (msg:"urlhaus-filter malicious website detected"; flow:established,from_client; content:"GET"; http_method; content:"${pathEscape.substring(0, 2048)}"; http_uri; nocase; content:"${hostname}"; content:"Host"; http_header; classtype:trojan-activity; sid:${sid}; rev:1;)\n`)
54 |   snort3.write(`alert http $HOME_NET any -> $EXTERNAL_NET any (msg:"urlhaus-filter malicious website detected"; http_header:field host; content:"${hostname}",nocase; http_uri; content:"${pathEscape}",nocase; classtype:trojan-activity; sid:${sid}; rev:1;)\n`)
55 |   suricata.write(`alert http $HOME_NET any -> $EXTERNAL_NET any (msg:"urlhaus-filter malicious website detected"; flow:established,from_client; http.method; content:"GET"; http.uri; content:"${pathEscape}"; endswith; nocase; http.host; content:"${hostname}"; classtype:trojan-activity; sid:${sid}; rev:1;)\n`)
56 |   splunk.write(`"${hostname}","${path}","urlhaus-filter malicious website detected","${process.env.CURRENT_TIME}"\n`)
57 | 
58 |   sid++
59 | }
60 | 
61 | snort2.close()
62 | snort3.close()
63 | suricata.close()
64 | splunk.close()
65 | 


--------------------------------------------------------------------------------
/src/clean_url.js:
--------------------------------------------------------------------------------
  1 | 'use strict'
  2 | 
  3 | import { createInterface } from 'node:readline'
  4 | 
  5 | const cleanHost = (hostname) => {
  6 |   return hostname
  7 |   // Remove invalid protocol, see #32
  8 |   .replace(/^(https?:\/\/)(?:ttps:\/\/|https:\/|http\/)/, '$1')
  9 |   .replace(/^(https?:\/\/)?www\./, '$1')
 10 | }
 11 | 
 12 | // nodejs does not percent-encode ^ yet
 13 | // https://github.com/nodejs/node/issues/57313
 14 | // Applies to path, exclude query string
 15 | const caretPath = (pathname) => {
 16 |   if (!pathname.includes('?')) return pathname.replaceAll('^', '%5E')
 17 | 
 18 |   const pathArray = pathname.split('?')
 19 |   const path = pathArray[0].replaceAll('^', '%5E')
 20 |   const search = pathArray.slice(1).join('?')
 21 | 
 22 |   return `${path}?${search}`
 23 | }
 24 | 
 25 | const safeLinks = [
 26 |   'safelinks\\.protection\\.outlook\\.com',
 27 |   '\\.protection\\.sophos\\.com',
 28 |   'linkprotect\\.cudasvc\\.com',
 29 |   'ctp\\.trendmicro\\.com',
 30 |   'urlsand\\.esvalabs\\.com'
 31 | ]
 32 | 
 33 | const deSafelink = (urlStr) => {
 34 |   let url = new URL(urlStr)
 35 | 
 36 |   // O365 Safelinks & Trendmicro
 37 |   if (url.hostname.endsWith('safelinks.protection.outlook.com') || url.hostname.endsWith('ctp.trendmicro.com')) {
 38 |     url = new URL(url.searchParams.get('url'))
 39 |   }
 40 | 
 41 |   // Sophos
 42 |   if (url.hostname.endsWith('.protection.sophos.com')) {
 43 |     url = new URL(`http://${url.searchParams.get('d')}`)
 44 |   }
 45 | 
 46 |   // Barracuda
 47 |   if (url.hostname.endsWith('linkprotect.cudasvc.com')) {
 48 |     url = new URL(url.searchParams.get('a'))
 49 |   }
 50 | 
 51 |   // ShopMy & Disqus
 52 |   if ((url.hostname === 'api.shopmy.us' && url.pathname === '/api/redirect_click') || url.hostname === 'disq.us') {
 53 |     url = new URL(url.searchParams.get('url'))
 54 |   }
 55 | 
 56 |   // VKontakte
 57 |   if ((url.hostname === 'vk.com' || url.hostname === 'vkontakte.ru') && url.pathname === '/away.php') {
 58 |     url = new URL(url.searchParams.get('to'))
 59 |   }
 60 | 
 61 |   // WhatsApp, Esvalabs
 62 |   if ((url.hostname === 'l.wl.co' && url.pathname === '/l') || url.hostname === 'urlsand.esvalabs.com') {
 63 |     url = new URL(url.searchParams.get('u'))
 64 |   }
 65 | 
 66 |   // Google Ads
 67 |   if (url.hostname.endsWith('doubleclick.net') || url.hostname.endsWith('googleadservices.com')) {
 68 |     let paramUrl = url.searchParams.getAll('adurl').at(-1) || url.searchParams.getAll('url').at(-1) || url.searchParams.getAll('ds_dest_url').at(-1)
 69 |     if (paramUrl) {
 70 |       paramUrl = paramUrl.replace(/^\/\//, 'https://')
 71 |       url = new URL(paramUrl)
 72 |     }
 73 |   }
 74 | 
 75 |   // Google Search
 76 |   // Google AMP does not redirect (e.g. google.com/amp/example.com)
 77 |   if (url.hostname.endsWith('google.com') && (url.pathname.startsWith('/url') || url.pathname.startsWith('/travel/clk'))) {
 78 |     const paramUrl = url.searchParams.get('q') || url.searchParams.get('url') || url.searchParams.get('pcurl')
 79 |     if (paramUrl) url = new URL(paramUrl)
 80 |   }
 81 | 
 82 |   // SES
 83 |   // https://github.com/uBlockOrigin/uAssets/blob/42e518277ab0c36d4b131aa01b4a8828af4e18b6/filters/privacy.txt#L866
 84 |   if (url.hostname.endsWith('awstrack.me' && url.pathname.startsWith('/L0'))) {
 85 |     url = new URL(decodeURIComponent(url.pathname.match(/\/L0\/(http[^\/?#]+)/)[1]))
 86 |   }
 87 | 
 88 |   // DuckDuckGo
 89 |   if (url.hostname === 'duckduckgo.com' && url.pathname === '/l/') {
 90 |     url = new URL(url.searchParams.get('uddg'))
 91 |   }
 92 | 
 93 |   // Calendly
 94 |   if (url.hostname === 'calendly.com' && url.pathname === '/url') {
 95 |     url = new URL(url.searchParams.get('q'))
 96 |   }
 97 | 
 98 |   if (url.hostname.match(new RegExp(safeLinks.join('|')))) {
 99 |     return deSafelink(url.href)
100 |   }
101 | 
102 |   return url.href
103 | }
104 | 
105 | for await (const line of createInterface({ input: process.stdin, terminal: false })) {
106 |   // parse hostname from url
107 |   if (process.argv[2] === 'hostname') {
108 |     if (URL.canParse(`http://${line}`)) {
109 |       const url = new URL(`http://${line}`)
110 | 
111 |       console.log(url.hostname)
112 |     } else {
113 |       const hostname = line
114 |         // host
115 |         .split('/')[0]
116 |         // exclude credential
117 |         .replace(/.*@(.+)/, '$1')
118 |         // exclude port
119 |         .replace(/:\d+$/, '')
120 |         // #2
121 |         .split('?')[0]
122 | 
123 |       console.log(hostname)
124 |     }
125 |   } else {
126 |     // Skip invalid domains, see #15
127 |     if (line.split('/')[2].includes('??')) continue
128 | 
129 |     if (URL.canParse(line)) {
130 |       const url = new URL(deSafelink(cleanHost(line)))
131 | 
132 |       url.host = cleanHost(url.host)
133 | 
134 |       // nodejs does not percent-encode ^ yet
135 |       // https://github.com/nodejs/node/issues/57313
136 |       url.pathname = caretPath(url.pathname)
137 |       const outUrl = `${url.host}${url.pathname}${url.search}`
138 |         // remove trailing slash from domain except path
139 |         .replace(/(^[^/]*)\/+$/, '$1')
140 | 
141 |       console.log(outUrl)
142 |     } else {
143 |       const outUrl = caretPath(cleanHost(line
144 |         // remove protocol
145 |         .split('/').slice(2).join('/')))
146 |         // url encode space
147 |         .replaceAll(' ', '%20')
148 |         .replace(/(^[^/]*)\/+$/, '$1')
149 | 
150 |       console.log(outUrl)
151 |     }
152 |   }
153 | }
154 | 


--------------------------------------------------------------------------------
/LICENSE-CC0.md:
--------------------------------------------------------------------------------
 1 | CC0 1.0 Universal
 2 | ==================
 3 | 
 4 | Statement of Purpose
 5 | ---------------------
 6 | 
 7 | The laws of most jurisdictions throughout the world automatically confer exclusive Copyright and Related Rights (defined below) upon the creator and subsequent owner(s) (each and all, an "owner") of an original work of authorship and/or a database (each, a "Work").
 8 | 
 9 | Certain owners wish to permanently relinquish those rights to a Work for the purpose of contributing to a commons of creative, cultural and scientific works ("Commons") that the public can reliably and without fear of later claims of infringement build upon, modify, incorporate in other works, reuse and redistribute as freely as possible in any form whatsoever and for any purposes, including without limitation commercial purposes. These owners may contribute to the Commons to promote the ideal of a free culture and the further production of creative, cultural and scientific works, or to gain reputation or greater distribution for their Work in part through the use and efforts of others.
10 | 
11 | For these and/or other purposes and motivations, and without any expectation of additional consideration or compensation, the person associating CC0 with a Work (the "Affirmer"), to the extent that he or she is an owner of Copyright and Related Rights in the Work, voluntarily elects to apply CC0 to the Work and publicly distribute the Work under its terms, with knowledge of his or her Copyright and Related Rights in the Work and the meaning and intended legal effect of CC0 on those rights.
12 | 
13 | 1. Copyright and Related Rights.
14 | --------------------------------
15 | A Work made available under CC0 may be protected by copyright and related or neighboring rights ("Copyright and Related Rights"). Copyright and Related Rights include, but are not limited to, the following:
16 | 
17 | i. the right to reproduce, adapt, distribute, perform, display, communicate, and translate a Work;
18 | ii. moral rights retained by the original author(s) and/or performer(s);
19 | iii. publicity and privacy rights pertaining to a person's image or likeness depicted in a Work;
20 | iv. rights protecting against unfair competition in regards to a Work, subject to the limitations in paragraph 4(a), below;
21 | v. rights protecting the extraction, dissemination, use and reuse of data in a Work;
22 | vi. database rights (such as those arising under Directive 96/9/EC of the European Parliament and of the Council of 11 March 1996 on the legal protection of databases, and under any national implementation thereof, including any amended or successor version of such directive); and
23 | vii. other similar, equivalent or corresponding rights throughout the world based on applicable law or treaty, and any national implementations thereof.
24 | 
25 | 2. Waiver.
26 | -----------
27 | To the greatest extent permitted by, but not in contravention of, applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and unconditionally waives, abandons, and surrenders all of Affirmer's Copyright and Related Rights and associated claims and causes of action, whether now known or unknown (including existing as well as future claims and causes of action), in the Work (i) in all territories worldwide, (ii) for the maximum duration provided by applicable law or treaty (including future time extensions), (iii) in any current or future medium and for any number of copies, and (iv) for any purpose whatsoever, including without limitation commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each member of the public at large and to the detriment of Affirmer's heirs and successors, fully intending that such Waiver shall not be subject to revocation, rescission, cancellation, termination, or any other legal or equitable action to disrupt the quiet enjoyment of the Work by the public as contemplated by Affirmer's express Statement of Purpose.
28 | 
29 | 3. Public License Fallback.
30 | ----------------------------
31 | Should any part of the Waiver for any reason be judged legally invalid or ineffective under applicable law, then the Waiver shall be preserved to the maximum extent permitted taking into account Affirmer's express Statement of Purpose. In addition, to the extent the Waiver is so judged Affirmer hereby grants to each affected person a royalty-free, non transferable, non sublicensable, non exclusive, irrevocable and unconditional license to exercise Affirmer's Copyright and Related Rights in the Work (i) in all territories worldwide, (ii) for the maximum duration provided by applicable law or treaty (including future time extensions), (iii) in any current or future medium and for any number of copies, and (iv) for any purpose whatsoever, including without limitation commercial, advertising or promotional purposes (the "License"). The License shall be deemed effective as of the date CC0 was applied by Affirmer to the Work. Should any part of the License for any reason be judged legally invalid or ineffective under applicable law, such partial invalidity or ineffectiveness shall not invalidate the remainder of the License, and in such case Affirmer hereby affirms that he or she will not (i) exercise any of his or her remaining Copyright and Related Rights in the Work or (ii) assert any associated claims and causes of action with respect to the Work, in either case contrary to Affirmer's express Statement of Purpose.
32 | 
33 | 4. Limitations and Disclaimers.
34 | --------------------------------
35 | 
36 | a. No trademark or patent rights held by Affirmer are waived, abandoned, surrendered, licensed or otherwise affected by this document.
37 | b. Affirmer offers the Work as-is and makes no representations or warranties of any kind concerning the Work, express, implied, statutory or otherwise, including without limitation warranties of title, merchantability, fitness for a particular purpose, non infringement, or the absence of latent or other defects, accuracy, or the present or absence of errors, whether or not discoverable, all to the greatest extent permissible under applicable law.
38 | c. Affirmer disclaims responsibility for clearing rights of other persons that may apply to the Work or any use thereof, including without limitation any person's Copyright and Related Rights in the Work. Further, Affirmer disclaims responsibility for obtaining any necessary consents, permissions or other rights required for any use of the Work.
39 | d. Affirmer understands and acknowledges that Creative Commons is not a party to this document and has no duty or obligation with respect to this CC0 or use of the Work.
40 | 
41 | For more information, please see
42 | https://creativecommons.org/publicdomain/zero/1.0/


--------------------------------------------------------------------------------
/src/script.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/sh
  2 | 
  3 | if ! (set -o pipefail 2>/dev/null); then
  4 |   # dash does not support pipefail
  5 |   set -efx
  6 | else
  7 |   set -efx -o pipefail
  8 | fi
  9 | 
 10 | # bash does not expand alias by default for non-interactive script
 11 | if [ -n "$BASH_VERSION" ]; then
 12 |   shopt -s expand_aliases
 13 | fi
 14 | 
 15 | alias curl="curl -L"
 16 | alias rm="rm -rf"
 17 | 
 18 | ## Use GNU grep, busybox grep is not as performant
 19 | DISTRO=""
 20 | if [ -f "/etc/os-release" ]; then
 21 |   . "/etc/os-release"
 22 |   DISTRO="$ID"
 23 | fi
 24 | 
 25 | check_grep() {
 26 |   if [ -z "$(grep --help | grep 'GNU')" ]; then
 27 |     if [ -x "/usr/bin/grep" ]; then
 28 |       alias grep="/usr/bin/grep"
 29 |       check_grep
 30 |     else
 31 |       if [ "$DISTRO" = "alpine" ]; then
 32 |         echo "Please install GNU grep 'apk add grep'"
 33 |       else
 34 |         echo "GNU grep not found"
 35 |       fi
 36 |       exit 1
 37 |     fi
 38 |   fi
 39 | }
 40 | check_grep
 41 | 
 42 | if ! command -v dos2unix &> /dev/null
 43 | then
 44 |   if command -v busybox &> /dev/null
 45 |   then
 46 |     alias dos2unix="busybox dos2unix"
 47 |   else
 48 |     echo "dos2unix or busybox not found"
 49 |     exit 1
 50 |   fi
 51 | fi
 52 | 
 53 | if command -v unzip &> /dev/null
 54 | then
 55 |   alias unzip="unzip -p"
 56 | elif command -v busybox &> /dev/null
 57 | then
 58 |   alias unzip="busybox unzip -p"
 59 | elif command -v bsdunzip &> /dev/null
 60 | then
 61 |   alias unzip="bsdunzip -p"
 62 | else
 63 |   echo "unzip not found"
 64 |   exit 1
 65 | fi
 66 | 
 67 | ## Create a temporary working folder
 68 | rm "tmp/"
 69 | mkdir -p "tmp/"
 70 | cd "tmp/"
 71 | 
 72 | 
 73 | ## Prepare datasets
 74 | curl "https://urlhaus.abuse.ch/downloads/csv/" -o "urlhaus.zip"
 75 | curl "https://s3-us-west-1.amazonaws.com/umbrella-static/top-1m.csv.zip" -o "top-1m-umbrella.zip"
 76 | curl "https://tranco-list.eu/download/daily/top-1m.csv.zip" -o "top-1m-tranco.zip"
 77 | 
 78 | ## Cloudflare Radar
 79 | if [ -n "$CF_API" ]; then
 80 |   mkdir -p "cf/"
 81 |   # Get the latest domain ranking buckets
 82 |   curl -X GET "https://api.cloudflare.com/client/v4/radar/datasets?limit=5&offset=0&datasetType=RANKING_BUCKET&format=json" \
 83 |     -H "Authorization: Bearer $CF_API" -o "cf/datasets.json"
 84 |   # Get the top 1m bucket's dataset ID
 85 |   DATASET_ID=$(jq ".result.datasets[] | select(.meta.top==1000000) | .id" "cf/datasets.json")
 86 |   # Get the dataset download url
 87 |   curl --request POST \
 88 |     --url "https://api.cloudflare.com/client/v4/radar/datasets/download" \
 89 |     --header "Content-Type: application/json" \
 90 |     --header "Authorization: Bearer $CF_API" \
 91 |     --data "{ \"datasetId\": $DATASET_ID }" \
 92 |     -o "cf/dataset-url.json"
 93 |   DATASET_URL=$(jq ".result.dataset.url" "cf/dataset-url.json" | sed 's/"//g')
 94 |   curl "$DATASET_URL" -o "cf/top-1m-radar.csv"
 95 | 
 96 |   ## Parse the Radar 1 Million
 97 |   cat "cf/top-1m-radar.csv" | \
 98 |   dos2unix | \
 99 |   tr "[:upper:]" "[:lower:]" | \
100 |   grep -F "." | \
101 |   sed "s/^www\.//" | \
102 |   sort -u > "top-1m-radar.txt"
103 | fi
104 | 
105 | cp "../src/exclude.txt" "."
106 | cp "../src/exclude-url.txt" "."
107 | 
108 | ## Prepare URLhaus.csv
109 | unzip "urlhaus.zip" | \
110 | # Convert DOS to Unix line ending
111 | dos2unix | \
112 | tr "[:upper:]" "[:lower:]" | \
113 | # Remove comment
114 | sed "/^#/d" > "URLhaus.csv"
115 | 
116 | ## Parse URLs
117 | cat "URLhaus.csv" | \
118 | cut -f 6 -d '"' | \
119 | node "../src/clean_url.js" | \
120 | sort -u > "urlhaus.txt"
121 | 
122 | ## Parse domain and IP address only
123 | cat "urlhaus.txt" | \
124 | node "../src/clean_url.js" hostname | \
125 | sort -u > "urlhaus-domains.txt"
126 | 
127 | ## Parse online URLs only
128 | cat "URLhaus.csv" | \
129 | grep -F '"online"' | \
130 | cut -f 6 -d '"' | \
131 | node "../src/clean_url.js" | \
132 | sort -u > "urlhaus-online.txt"
133 | 
134 | cat "urlhaus-online.txt" | \
135 | node "../src/clean_url.js" hostname | \
136 | sort -u > "urlhaus-domains-online.txt"
137 | 
138 | 
139 | ## Parse the Umbrella 1 Million
140 | unzip "top-1m-umbrella.zip" | \
141 | dos2unix | \
142 | tr "[:upper:]" "[:lower:]" | \
143 | # Parse domains only
144 | cut -f 2 -d "," | \
145 | grep -F "." | \
146 | # Remove www.
147 | sed "s/^www\.//" | \
148 | sort -u > "top-1m-umbrella.txt"
149 | 
150 | ## Parse the Tranco 1 Million
151 | if [ -n "$(file 'top-1m-tranco.zip' | grep 'Zip archive data')" ]; then
152 |   unzip "top-1m-tranco.zip" | \
153 |   dos2unix | \
154 |   tr "[:upper:]" "[:lower:]" | \
155 |   # Parse domains only
156 |   cut -f 2 -d "," | \
157 |   grep -F "." | \
158 |   # Remove www.
159 |   sed "s/^www\.//" | \
160 |   sort -u > "top-1m-tranco.txt"
161 | else
162 |   # tranco has unreliable download
163 |   echo "top-1m-tranco.zip is not a zip, skipping it..."
164 |   touch "top-1m-tranco.txt"
165 | fi
166 | 
167 | # Merge Umbrella and self-maintained top domains
168 | cat "top-1m-umbrella.txt" "top-1m-tranco.txt" "exclude.txt" | \
169 | sort -u > "top-1m-well-known.txt"
170 | 
171 | if [ -n "$CF_API" ] && [ -f "top-1m-radar.txt" ]; then
172 |   cat "top-1m-radar.txt" >> "top-1m-well-known.txt"
173 |   # sort in-place
174 |   sort "top-1m-well-known.txt" -u -o "top-1m-well-known.txt"
175 | fi
176 | 
177 | 
178 | cat "exclude-url.txt" | \
179 | sed "/^#/d" | \
180 | # "example.com/path" -> "^example\.com/path"
181 | # slash doesn't need to be escaped
182 | sed -e "s/^/^/" -e "s/\./\\\./g" > "exclude-url-grep.txt"
183 | 
184 | ## Parse popular domains from URLhaus
185 | cat "urlhaus-domains.txt" | \
186 | # grep match whole line
187 | grep -Fx -f "top-1m-well-known.txt" > "urlhaus-top-domains.txt"
188 | 
189 | 
190 | ## Parse domains from URLhaus excluding popular domains
191 | cat "urlhaus-domains.txt" | \
192 | grep -F -vf "urlhaus-top-domains.txt" | \
193 | # exclude domains from domains-based filters #110
194 | grep -vf "exclude-url-grep.txt" | \
195 | # Remove blank lines
196 | sed "/^$/d" > "malware-domains.txt"
197 | 
198 | cat "urlhaus-domains-online.txt" | \
199 | grep -F -vf "urlhaus-top-domains.txt" | \
200 | grep -vf "exclude-url-grep.txt" | \
201 | sed "/^$/d" > "malware-domains-online.txt"
202 | 
203 | ## Parse malware URLs from popular domains
204 | cat "urlhaus.txt" | \
205 | grep -F -f "urlhaus-top-domains.txt" | \
206 | # exclude domains/URLs from URL-based filters #110
207 | grep -vf "exclude-url-grep.txt" | \
208 | sed "s/^/||/" | \
209 | sed 's/$/^$all/' > "malware-url-top-domains.txt"
210 | 
211 | cat "urlhaus-online.txt" | \
212 | grep -F -f "urlhaus-top-domains.txt" | \
213 | grep -vf "exclude-url-grep.txt" | \
214 | sed "s/^/||/" | \
215 | sed 's/$/^$all/' > "malware-url-top-domains-online.txt"
216 | 
217 | cat "urlhaus-online.txt" | \
218 | grep -F -f "urlhaus-top-domains.txt" | \
219 | grep -vf "exclude-url-grep.txt" > "malware-url-top-domains-raw-online.txt"
220 | 
221 | 
222 | ## Merge malware domains and URLs
223 | CURRENT_TIME=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
224 | FIRST_LINE="! Title: Malicious URL Blocklist"
225 | SECOND_LINE="! Updated: $CURRENT_TIME"
226 | THIRD_LINE="! Expires: 12 hours (update frequency)"
227 | FOURTH_LINE="! Homepage: https://gitlab.com/malware-filter/urlhaus-filter"
228 | FIFTH_LINE="! License: https://gitlab.com/malware-filter/urlhaus-filter#license"
229 | SIXTH_LINE="! Source: https://urlhaus.abuse.ch/api/"
230 | COMMENT_ABP="$FIRST_LINE\n$SECOND_LINE\n$THIRD_LINE\n$FOURTH_LINE\n$FIFTH_LINE\n$SIXTH_LINE"
231 | 
232 | mkdir -p "../public/"
233 | 
234 | cat "malware-domains.txt" "malware-url-top-domains.txt" | \
235 | sed "1i $COMMENT_ABP" > "../public/urlhaus-filter.txt"
236 | 
237 | cat "malware-domains-online.txt" "malware-url-top-domains-online.txt" | \
238 | sed "1i $COMMENT_ABP" | \
239 | sed "1s/Malicious/Online Malicious/" > "../public/urlhaus-filter-online.txt"
240 | 
241 | 
242 | # Adguard Home (#19, #22)
243 | cat "malware-domains.txt" | \
244 | sed "s/^/||/" | \
245 | sed "s/$/^/" | \
246 | sort -u > "malware-domains-adguard-home.txt"
247 | 
248 | cat "malware-domains-online.txt" | \
249 | sed "s/^/||/" | \
250 | sed "s/$/^/" > "malware-domains-online-adguard-home.txt"
251 | 
252 | cat "malware-domains-adguard-home.txt" | \
253 | sed "1i $COMMENT_ABP" | \
254 | sed "1s/Blocklist/Blocklist (AdGuard Home)/" > "../public/urlhaus-filter-agh.txt"
255 | 
256 | cat "malware-domains-online-adguard-home.txt" | \
257 | sed "1i $COMMENT_ABP" | \
258 | sed "1s/Malicious/Online Malicious/" | \
259 | sed "1s/Blocklist/Blocklist (AdGuard Home)/" > "../public/urlhaus-filter-agh-online.txt"
260 | 
261 | 
262 | # Adguard browser extension
263 | cat "malware-domains.txt" | \
264 | sed "s/^/||/" | \
265 | sed 's/$/^$all/' > "malware-domains-adguard.txt"
266 | 
267 | cat "malware-domains-online.txt" | \
268 | sed "s/^/||/" | \
269 | sed 's/$/^$all/' > "malware-domains-online-adguard.txt"
270 | 
271 | cat "malware-domains-adguard.txt" "malware-url-top-domains.txt" | \
272 | sed "1i $COMMENT_ABP" | \
273 | sed "1s/Blocklist/Blocklist (AdGuard)/" > "../public/urlhaus-filter-ag.txt"
274 | 
275 | cat "malware-domains-online-adguard.txt" "malware-url-top-domains-online.txt" | \
276 | sed "1i $COMMENT_ABP" | \
277 | sed "1s/Malicious/Online Malicious/" | \
278 | sed "1s/Blocklist/Blocklist (AdGuard)/" > "../public/urlhaus-filter-ag-online.txt"
279 | 
280 | 
281 | # Vivaldi
282 | cat "malware-domains.txt" | \
283 | sed "s/^/||/" | \
284 | sed 's/$/^$document/' > "malware-domains-vivaldi.txt"
285 | 
286 | cat "malware-domains-online.txt" | \
287 | sed "s/^/||/" | \
288 | sed 's/$/^$document/' > "malware-domains-online-vivaldi.txt"
289 | 
290 | cat "malware-domains-vivaldi.txt" "malware-url-top-domains.txt" | \
291 | sed 's/\$all$/$document/' | \
292 | sed "1i $COMMENT_ABP" | \
293 | sed "1s/Blocklist/Blocklist (Vivaldi)/" > "../public/urlhaus-filter-vivaldi.txt"
294 | 
295 | cat "malware-domains-online-vivaldi.txt" "malware-url-top-domains-online.txt" | \
296 | sed 's/\$all$/$document/' | \
297 | sed "1i $COMMENT_ABP" | \
298 | sed "1s/Malicious/Online Malicious/" | \
299 | sed "1s/Blocklist/Blocklist (Vivaldi)/" > "../public/urlhaus-filter-vivaldi-online.txt"
300 | 
301 | 
302 | ## Domains-only blocklist
303 | # awk + head is a workaround for sed prepend
304 | COMMENT=$(printf "$COMMENT_ABP" | sed "s/^!/#/" | sed "1s/URL/Domains/" | awk '{printf "%s\\n", $0}' | head -c -2)
305 | COMMENT_ONLINE=$(printf "$COMMENT" | sed "1s/Malicious/Online Malicious/" | awk '{printf "%s\\n", $0}' | head -c -2)
306 | 
307 | cat "malware-domains.txt" | \
308 | # remove IPv6 bracket
309 | sed -r "s/\[|\]//g" | \
310 | sed "1i $COMMENT" > "../public/urlhaus-filter-domains.txt"
311 | 
312 | cat "malware-domains-online.txt" | \
313 | sed -r "s/\[|\]//g" | \
314 | sed "1i $COMMENT_ONLINE" > "../public/urlhaus-filter-domains-online.txt"
315 | 
316 | 
317 | ## Hosts only
318 | cat "malware-domains.txt" | \
319 | # exclude IPv4
320 | grep -vE "^([0-9]{1,3}[\.]){3}[0-9]{1,3}$" | \
321 | # exclude IPv6
322 | grep -vE "^\[" > "malware-hosts.txt"
323 | 
324 | cat "malware-domains-online.txt" | \
325 | grep -vE "^([0-9]{1,3}[\.]){3}[0-9]{1,3}$" | \
326 | grep -vE "^\[" > "malware-hosts-online.txt"
327 | 
328 | 
329 | ## Hosts file blocklist
330 | cat "malware-hosts.txt" | \
331 | sed "s/^/0.0.0.0 /" | \
332 | # Re-insert comment
333 | sed "1i $COMMENT" | \
334 | sed "1s/Domains/Hosts/" > "../public/urlhaus-filter-hosts.txt"
335 | 
336 | cat "malware-hosts-online.txt" | \
337 | sed "s/^/0.0.0.0 /" | \
338 | sed "1i $COMMENT_ONLINE" | \
339 | sed "1s/Domains/Hosts/" > "../public/urlhaus-filter-hosts-online.txt"
340 | 
341 | 
342 | ## Dnsmasq-compatible blocklist
343 | cat "malware-hosts.txt" | \
344 | sed "s/^/address=\//" | \
345 | sed "s/$/\/0.0.0.0/" | \
346 | sed "1i $COMMENT" | \
347 | sed "1s/Blocklist/dnsmasq Blocklist/" > "../public/urlhaus-filter-dnsmasq.conf"
348 | 
349 | cat "malware-hosts-online.txt" | \
350 | sed "s/^/address=\//" | \
351 | sed "s/$/\/0.0.0.0/" | \
352 | sed "1i $COMMENT_ONLINE" | \
353 | sed "1s/Blocklist/dnsmasq Blocklist/" > "../public/urlhaus-filter-dnsmasq-online.conf"
354 | 
355 | 
356 | ## BIND-compatible blocklist
357 | cat "malware-hosts.txt" | \
358 | sed 's/^/zone "/' | \
359 | sed 's/$/" { type master; notify no; file "null.zone.file"; };/' | \
360 | sed "1i $COMMENT" | \
361 | sed "1s/Blocklist/BIND Blocklist/" > "../public/urlhaus-filter-bind.conf"
362 | 
363 | cat "malware-hosts-online.txt" | \
364 | sed 's/^/zone "/' | \
365 | sed 's/$/" { type master; notify no; file "null.zone.file"; };/' | \
366 | sed "1i $COMMENT_ONLINE" | \
367 | sed "1s/Blocklist/BIND Blocklist/" > "../public/urlhaus-filter-bind-online.conf"
368 | 
369 | 
370 | ## DNS Response Policy Zone (RPZ)
371 | CURRENT_UNIX_TIME="$(date +%s)"
372 | RPZ_SYNTAX="\n\$TTL 30\n@ IN SOA localhost. root.localhost. $CURRENT_UNIX_TIME 86400 3600 604800 30\n NS localhost.\n"
373 | 
374 | cat "malware-hosts.txt" | \
375 | sed "s/$/ CNAME ./" | \
376 | sed '1 i\'"$RPZ_SYNTAX"'' | \
377 | sed "1i $COMMENT" | \
378 | sed "s/^#/;/" | \
379 | sed "1s/Blocklist/RPZ Blocklist/" > "../public/urlhaus-filter-rpz.conf"
380 | 
381 | cat "malware-hosts-online.txt" | \
382 | sed "s/$/ CNAME ./" | \
383 | sed '1 i\'"$RPZ_SYNTAX"'' | \
384 | sed "1i $COMMENT_ONLINE" | \
385 | sed "s/^#/;/" | \
386 | sed "1s/Blocklist/RPZ Blocklist/" > "../public/urlhaus-filter-rpz-online.conf"
387 | 
388 | 
389 | ## Unbound-compatible blocklist
390 | cat "malware-hosts.txt" | \
391 | sed 's/^/local-zone: "/' | \
392 | sed 's/$/" always_nxdomain/' | \
393 | sed "1i $COMMENT" | \
394 | sed "1s/Blocklist/Unbound Blocklist/" > "../public/urlhaus-filter-unbound.conf"
395 | 
396 | cat "malware-hosts-online.txt" | \
397 | sed 's/^/local-zone: "/' | \
398 | sed 's/$/" always_nxdomain/' | \
399 | sed "1i $COMMENT_ONLINE" | \
400 | sed "1s/Blocklist/Unbound Blocklist/" > "../public/urlhaus-filter-unbound-online.conf"
401 | 
402 | 
403 | ## dnscrypt-proxy blocklists
404 | # name-based
405 | cat "malware-hosts.txt" | \
406 | sed "1i $COMMENT" | \
407 | sed "1s/Domains/Names/" > "../public/urlhaus-filter-dnscrypt-blocked-names.txt"
408 | 
409 | cat "malware-hosts-online.txt" | \
410 | sed "1i $COMMENT_ONLINE" | \
411 | sed "1s/Domains/Names/" > "../public/urlhaus-filter-dnscrypt-blocked-names-online.txt"
412 | 
413 | # IPv4/6
414 | if grep -Eq "^(([0-9]{1,3}[\.]){3}[0-9]{1,3}$|\[)" "malware-domains.txt"; then
415 |   cat "malware-domains.txt" | \
416 |   grep -E "^(([0-9]{1,3}[\.]){3}[0-9]{1,3}$|\[)" | \
417 |   sed -r "s/\[|\]//g" | \
418 |   sed "1i $COMMENT" | \
419 |   sed "1s/Domains/IPs/" > "../public/urlhaus-filter-dnscrypt-blocked-ips.txt"
420 | 
421 |   cat "malware-domains-online.txt" | \
422 |   grep -E "^(([0-9]{1,3}[\.]){3}[0-9]{1,3}$|\[)" | \
423 |   sed -r "s/\[|\]//g" | \
424 |   sed "1i $COMMENT_ONLINE" | \
425 |   sed "1s/Domains/IPs/" > "../public/urlhaus-filter-dnscrypt-blocked-ips-online.txt"
426 | else
427 |   echo | \
428 |   sed "1i $COMMENT" | \
429 |   sed "1s/Domains/IPs/" > "../public/urlhaus-filter-dnscrypt-blocked-ips.txt"
430 | 
431 |   echo | \
432 |   sed "1i $COMMENT_ONLINE" | \
433 |   sed "1s/Domains/IPs/" > "../public/urlhaus-filter-dnscrypt-blocked-ips-online.txt"
434 | fi
435 | 
436 | ## Wildcard subdomain
437 | cat "malware-domains.txt" | \
438 | sed "s/^/*./" | \
439 | sed "1i $COMMENT" | \
440 | sed "1s/Blocklist/Wildcard Asterisk Blocklist/" > "../public/urlhaus-filter-wildcard.txt"
441 | 
442 | cat "malware-domains-online.txt" | \
443 | sed "s/^/*./" | \
444 | sed "1i $COMMENT" | \
445 | sed "1s/Blocklist/Wildcard Asterisk Blocklist/" > "../public/urlhaus-filter-wildcard-online.txt"
446 | 
447 | 
448 | # Snort, Suricata, Splunk
449 | rm "../public/urlhaus-filter-snort2-online.rules" \
450 |   "../public/urlhaus-filter-snort3-online.rules" \
451 |   "../public/urlhaus-filter-suricata-online.rules" \
452 |   "../public/urlhaus-filter-suricata-sni-online.rules" \
453 |   "../public/urlhaus-filter-splunk-online.csv"
454 | 
455 | export CURRENT_TIME
456 | node "../src/ids.js"
457 | 
458 | sed -i "1i $COMMENT_ONLINE" "../public/urlhaus-filter-snort2-online.rules"
459 | sed -i "1s/Domains Blocklist/URL Snort2 Ruleset/" "../public/urlhaus-filter-snort2-online.rules"
460 | 
461 | sed -i "1i $COMMENT_ONLINE" "../public/urlhaus-filter-snort3-online.rules"
462 | sed -i "1s/Domains Blocklist/URL Snort3 Ruleset/" "../public/urlhaus-filter-snort3-online.rules"
463 | 
464 | sed -i "1i $COMMENT_ONLINE" "../public/urlhaus-filter-suricata-online.rules"
465 | sed -i "1s/Domains Blocklist/URL Suricata Ruleset/" "../public/urlhaus-filter-suricata-online.rules"
466 | 
467 | sed -i "1i $COMMENT_ONLINE" "../public/urlhaus-filter-suricata-sni-online.rules"
468 | sed -i "1s/Domains Blocklist/Domains Suricata Ruleset (SNI)/" "../public/urlhaus-filter-suricata-sni-online.rules"
469 | 
470 | sed -i -e "1i $COMMENT_ONLINE" -e '1i "host","path","message","updated"' "../public/urlhaus-filter-splunk-online.csv"
471 | sed -i "1s/Domains Blocklist/URL Splunk Lookup/" "../public/urlhaus-filter-splunk-online.csv"
472 | 
473 | 
474 | ## IE blocklist
475 | COMMENT_IE="msFilterList\n$COMMENT\n: Expires=1\n#"
476 | COMMENT_ONLINE_IE="msFilterList\n$COMMENT_ONLINE\n: Expires=1\n#"
477 | 
478 | cat "malware-domains.txt" | \
479 | sed -r "s/\[|\]//g" | \
480 | sed "s/^/-d /" | \
481 | sed "1i $COMMENT_IE" | \
482 | sed "2s/Domains Blocklist/Hosts Blocklist (IE)/" > "../public/urlhaus-filter.tpl"
483 | 
484 | cat "malware-domains-online.txt" | \
485 | sed -r "s/\[|\]//g" | \
486 | sed "s/^/-d /" | \
487 | sed "1i $COMMENT_ONLINE_IE" | \
488 | sed "2s/Domains Blocklist/Hosts Blocklist (IE)/" > "../public/urlhaus-filter-online.tpl"
489 | 
490 | 
491 | ## Clean up artifacts
492 | rm "URLhaus.csv" "top-1m-umbrella.zip" "top-1m-umbrella.txt" "top-1m-tranco.txt" "cf/" "top-1m-radar.txt"
493 | 
494 | 
495 | cd ../
496 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Malicious URL Blocklist
  2 | 
  3 | - [Lite version](#lite-version-online-links-only)
  4 | - [Full version](#full-version)
  5 | - Formats
  6 |   - [URL-based](#url-based)
  7 |   - [Domain-based](#domain-based)
  8 |   - [Wildcard asterisk](#wildcard-asterisk)
  9 |   - [Hosts-based](#hosts-based)
 10 |   - [Domain-based (AdGuard Home)](#domain-based-adguard-home)
 11 |   - [URL-based (AdGuard)](#url-based-adguard)
 12 |   - [URL-based (Vivaldi)](#url-based-vivaldi)
 13 |   - [Dnsmasq](#dnsmasq)
 14 |   - [BIND zone](#bind)
 15 |   - [RPZ](#response-policy-zone)
 16 |   - [Unbound](#unbound)
 17 |   - [dnscrypt-proxy](#dnscrypt-proxy)
 18 |   - [Snort2](#snort2)
 19 |   - [Snort3](#snort3)
 20 |   - [Suricata](#suricata)
 21 |     - [Suricata (SNI)](#suricata-sni)
 22 |   - [Splunk](#splunk)
 23 |   - [Tracking Protection List (IE)](#tracking-protection-list-ie)
 24 | - [Compressed version](#compressed-version)
 25 | - [Reporting issues](#issues)
 26 | - [Cloning](#cloning)
 27 | - [FAQ and Guides](#faq-and-guides)
 28 | - [CI Variables](#ci-variables)
 29 | - [License](#license)
 30 | 
 31 | A blocklist of malicious websites that are being used for malware distribution, based on the **Database dump (CSV)** of Abuse.ch [URLhaus](https://urlhaus.abuse.ch/). Blocklist is updated twice a day.
 32 | 
 33 | ## Lite version (online links only)
 34 | 
 35 | Online status of URLs is checked by URLhaus.
 36 | 
 37 | | Client | mirror 1 | mirror 2 | mirror 3 | mirror 4 | mirror 5 | mirror 6 |
 38 | | --- | --- | --- | --- | --- | --- | --- |
 39 | | [uBlock Origin](#url-based) ([*](#youtube-compatibility)) | [link](https://malware-filter.gitlab.io/malware-filter/urlhaus-filter-online.txt) | [link](https://curbengh.github.io/malware-filter/urlhaus-filter-online.txt) | [link](https://curbengh.github.io/urlhaus-filter/urlhaus-filter-online.txt) | [link](https://malware-filter.gitlab.io/urlhaus-filter/urlhaus-filter-online.txt) | [link](https://malware-filter.pages.dev/urlhaus-filter-online.txt) | [link](https://urlhaus-filter.pages.dev/urlhaus-filter-online.txt) |
 40 | | [AdGuard Home/Pi-hole](#domain-based-adguard-home) | [link](https://malware-filter.gitlab.io/malware-filter/urlhaus-filter-agh-online.txt) | [link](https://curbengh.github.io/malware-filter/urlhaus-filter-agh-online.txt) | [link](https://curbengh.github.io/urlhaus-filter/urlhaus-filter-agh-online.txt) | [link](https://malware-filter.gitlab.io/urlhaus-filter/urlhaus-filter-agh-online.txt) | [link](https://malware-filter.pages.dev/urlhaus-filter-agh-online.txt) | [link](https://urlhaus-filter.pages.dev/urlhaus-filter-agh-online.txt) |
 41 | | [AdGuard (browser extension)](#url-based-adguard)  | [link](https://malware-filter.gitlab.io/malware-filter/urlhaus-filter-ag-online.txt) | [link](https://curbengh.github.io/malware-filter/urlhaus-filter-ag-online.txt) | [link](https://curbengh.github.io/urlhaus-filter/urlhaus-filter-ag-online.txt) | [link](https://malware-filter.gitlab.io/urlhaus-filter/urlhaus-filter-ag-online.txt) | [link](https://malware-filter.pages.dev/urlhaus-filter-ag-online.txt) | [link](https://urlhaus-filter.pages.dev/urlhaus-filter-ag-online.txt) |
 42 | | [Vivaldi/Brave](#url-based-vivaldi) | [link](https://malware-filter.gitlab.io/malware-filter/urlhaus-filter-vivaldi-online.txt) | [link](https://curbengh.github.io/malware-filter/urlhaus-filter-vivaldi-online.txt) | [link](https://curbengh.github.io/urlhaus-filter/urlhaus-filter-vivaldi-online.txt) | [link](https://malware-filter.gitlab.io/urlhaus-filter/urlhaus-filter-vivaldi-online.txt) | [link](https://malware-filter.pages.dev/urlhaus-filter-vivaldi-online.txt) | [link](https://urlhaus-filter.pages.dev/urlhaus-filter-vivaldi-online.txt) |
 43 | | [Hosts](#hosts-based) | [link](https://malware-filter.gitlab.io/malware-filter/urlhaus-filter-hosts-online.txt) | [link](https://curbengh.github.io/malware-filter/urlhaus-filter-hosts-online.txt) | [link](https://curbengh.github.io/urlhaus-filter/urlhaus-filter-hosts-online.txt) | [link](https://malware-filter.gitlab.io/urlhaus-filter/urlhaus-filter-hosts-online.txt) | [link](https://malware-filter.pages.dev/urlhaus-filter-hosts-online.txt) | [link](https://urlhaus-filter.pages.dev/urlhaus-filter-hosts-online.txt) |
 44 | | [Dnsmasq](#dnsmasq) | [link](https://malware-filter.gitlab.io/malware-filter/urlhaus-filter-dnsmasq-online.conf) | [link](https://curbengh.github.io/malware-filter/urlhaus-filter-dnsmasq-online.conf) | [link](https://curbengh.github.io/urlhaus-filter/urlhaus-filter-dnsmasq-online.conf) | [link](https://malware-filter.gitlab.io/urlhaus-filter/urlhaus-filter-dnsmasq-online.conf) | [link](https://malware-filter.pages.dev/urlhaus-filter-dnsmasq-online.conf) | [link](https://urlhaus-filter.pages.dev/urlhaus-filter-dnsmasq-online.conf) |
 45 | | BIND [zone](#bind) | [link](https://malware-filter.gitlab.io/malware-filter/urlhaus-filter-bind-online.conf) | [link](https://curbengh.github.io/malware-filter/urlhaus-filter-bind-online.conf) | [link](https://curbengh.github.io/urlhaus-filter/urlhaus-filter-bind-online.conf) | [link](https://malware-filter.gitlab.io/urlhaus-filter/urlhaus-filter-bind-online.conf) | [link](https://malware-filter.pages.dev/urlhaus-filter-bind-online.conf) | [link](https://urlhaus-filter.pages.dev/urlhaus-filter-bind-online.conf) |
 46 | | BIND [RPZ](#response-policy-zone) | [link](https://malware-filter.gitlab.io/malware-filter/urlhaus-filter-rpz-online.conf) | [link](https://curbengh.github.io/malware-filter/urlhaus-filter-rpz-online.conf) | [link](https://curbengh.github.io/urlhaus-filter/urlhaus-filter-rpz-online.conf) | [link](https://malware-filter.gitlab.io/urlhaus-filter/urlhaus-filter-rpz-online.conf) | [link](https://malware-filter.pages.dev/urlhaus-filter-rpz-online.conf) | [link](https://urlhaus-filter.pages.dev/urlhaus-filter-rpz-online.conf) |
 47 | | [dnscrypt-proxy](#dnscrypt-proxy) | [names-online.txt](https://malware-filter.gitlab.io/malware-filter/urlhaus-filter-dnscrypt-blocked-names-online.txt), [ips-online.txt](https://malware-filter.gitlab.io/malware-filter/urlhaus-filter-dnscrypt-blocked-ips-online.txt) | [names-online.txt](https://curbengh.github.io/malware-filter/urlhaus-filter-dnscrypt-blocked-names-online.txt), [ips-online.txt](https://curbengh.github.io/malware-filter/urlhaus-filter-dnscrypt-blocked-ips-online.txt) | [names-online.txt](https://curbengh.github.io/urlhaus-filter/urlhaus-filter-dnscrypt-blocked-names-online.txt), [ips-online.txt](https://curbengh.github.io/urlhaus-filter/urlhaus-filter-dnscrypt-blocked-ips-online.txt) | [names-online.txt](https://malware-filter.gitlab.io/urlhaus-filter/urlhaus-filter-dnscrypt-blocked-names-online.txt), [ips-online.txt](https://malware-filter.gitlab.io/urlhaus-filter/urlhaus-filter-dnscrypt-blocked-ips-online.txt) | [names-online.txt](https://malware-filter.pages.dev/urlhaus-filter-dnscrypt-blocked-names-online.txt), [ips-online.txt](https://malware-filter.pages.dev/urlhaus-filter-dnscrypt-blocked-ips-online.txt) | [names-online.txt](https://urlhaus-filter.pages.dev/urlhaus-filter-dnscrypt-blocked-names-online.txt), [ips-online.txt](https://urlhaus-filter.pages.dev/urlhaus-filter-dnscrypt-blocked-ips-online.txt) |
 48 | | [blocky](#wildcard-asterisk) | [link](https://malware-filter.gitlab.io/malware-filter/urlhaus-filter-wildcard-online.txt) | [link](https://curbengh.github.io/malware-filter/urlhaus-filter-wildcard-online.txt) | [link](https://curbengh.github.io/phishing-filter/urlhaus-filter-wildcard-online.txt) | [link](https://malware-filter.gitlab.io/phishing-filter/urlhaus-filter-wildcard-online.txt) | [link](https://malware-filter.pages.dev/urlhaus-filter-wildcard-online.txt) | [link](https://phishing-filter.pages.dev/urlhaus-filter-wildcard-online.txt) |
 49 | | [Snort2](#snort2) | [link](https://malware-filter.gitlab.io/malware-filter/urlhaus-filter-snort2-online.rules) | [link](https://curbengh.github.io/malware-filter/urlhaus-filter-snort2-online.rules) | [link](https://curbengh.github.io/urlhaus-filter/urlhaus-filter-snort2-online.rules) | [link](https://malware-filter.gitlab.io/urlhaus-filter/urlhaus-filter-snort2-online.rules) | [link](https://malware-filter.pages.dev/urlhaus-filter-snort2-online.rules) | [link](https://urlhaus-filter.pages.dev/urlhaus-filter-snort2-online.rules) |
 50 | | [Snort3](#snort3) | [link](https://malware-filter.gitlab.io/malware-filter/urlhaus-filter-snort3-online.rules) | [link](https://curbengh.github.io/malware-filter/urlhaus-filter-snort3-online.rules) | [link](https://curbengh.github.io/urlhaus-filter/urlhaus-filter-snort3-online.rules) | [link](https://malware-filter.gitlab.io/urlhaus-filter/urlhaus-filter-snort3-online.rules) | [link](https://malware-filter.pages.dev/urlhaus-filter-snort3-online.rules) | [link](https://urlhaus-filter.pages.dev/urlhaus-filter-snort3-online.rules) |
 51 | | [Suricata](#suricata) | [link](https://malware-filter.gitlab.io/malware-filter/urlhaus-filter-suricata-online.rules) | [link](https://curbengh.github.io/malware-filter/urlhaus-filter-suricata-online.rules) | [link](https://curbengh.github.io/urlhaus-filter/urlhaus-filter-suricata-online.rules) | [link](https://malware-filter.gitlab.io/urlhaus-filter/urlhaus-filter-suricata-online.rules) | [link](https://malware-filter.pages.dev/urlhaus-filter-suricata-online.rules) | [link](https://urlhaus-filter.pages.dev/urlhaus-filter-suricata-online.rules) |
 52 | | [Suricata (SNI)](#suricata-sni) | [link](https://malware-filter.gitlab.io/malware-filter/urlhaus-filter-suricata-sni-online.rules) | [link](https://curbengh.github.io/malware-filter/urlhaus-filter-suricata-sni-online.rules) | [link](https://curbengh.github.io/urlhaus-filter/urlhaus-filter-suricata-sni-online.rules) | [link](https://malware-filter.gitlab.io/urlhaus-filter/urlhaus-filter-suricata-sni-online.rules) | [link](https://malware-filter.pages.dev/urlhaus-filter-suricata-sni-online.rules) | [link](https://urlhaus-filter.pages.dev/urlhaus-filter-suricata-sni-online.rules) |
 53 | | [Splunk](#splunk) | [link](https://malware-filter.gitlab.io/malware-filter/urlhaus-filter-splunk-online.csv) | [link](https://curbengh.github.io/malware-filter/urlhaus-filter-splunk-online.csv) | [link](https://curbengh.github.io/urlhaus-filter/urlhaus-filter-splunk-online.csv) | [link](https://malware-filter.gitlab.io/urlhaus-filter/urlhaus-filter-splunk-online.csv) | [link](https://malware-filter.pages.dev/urlhaus-filter-splunk-online.csv) | [link](https://urlhaus-filter.pages.dev/urlhaus-filter-splunk-online.csv) |
 54 | | [Internet Explorer](#tracking-protection-list-ie) | [link](https://malware-filter.gitlab.io/malware-filter/urlhaus-filter.tpl) | [link](https://curbengh.github.io/malware-filter/urlhaus-filter.tpl) | [link](https://curbengh.github.io/urlhaus-filter/urlhaus-filter.tpl) | [link](https://malware-filter.gitlab.io/urlhaus-filter/urlhaus-filter.tpl) | [link](https://malware-filter.pages.dev/urlhaus-filter.tpl) | [link](https://urlhaus-filter.pages.dev/urlhaus-filter.tpl) |
 55 | 
 56 | ## Full version
 57 | 
 58 | | Client | mirror 1 | mirror 2 | mirror 3 | mirror 4 | mirror 5 | mirror 6 |
 59 | | --- | --- | --- | --- | --- | --- | --- |
 60 | | [uBlock Origin](#url-based) ([*](#youtube-compatibility)) | [link](https://malware-filter.gitlab.io/malware-filter/urlhaus-filter.txt) | [link](https://curbengh.github.io/malware-filter/urlhaus-filter.txt) | [link](https://curbengh.github.io/urlhaus-filter/urlhaus-filter.txt) | [link](https://malware-filter.gitlab.io/urlhaus-filter/urlhaus-filter.txt) | [link](https://malware-filter.pages.dev/urlhaus-filter.txt) | [link](https://urlhaus-filter.pages.dev/urlhaus-filter.txt) |
 61 | | [AdGuard Home/Pi-hole](#domain-based-adguard-home) | [link](https://malware-filter.gitlab.io/malware-filter/urlhaus-filter-agh.txt) | [link](https://curbengh.github.io/malware-filter/urlhaus-filter-agh.txt) | [link](https://curbengh.github.io/urlhaus-filter/urlhaus-filter-agh.txt) | [link](https://malware-filter.gitlab.io/urlhaus-filter/urlhaus-filter-agh.txt) | [link](https://malware-filter.pages.dev/urlhaus-filter-agh.txt) | [link](https://urlhaus-filter.pages.dev/urlhaus-filter-agh.txt) |
 62 | | [AdGuard (browser extension)](#url-based-adguard)  | [link](https://malware-filter.gitlab.io/malware-filter/urlhaus-filter-ag.txt) | [link](https://curbengh.github.io/malware-filter/urlhaus-filter-ag.txt) | [link](https://curbengh.github.io/urlhaus-filter/urlhaus-filter-ag.txt) | [link](https://malware-filter.gitlab.io/urlhaus-filter/urlhaus-filter-ag.txt) | [link](https://malware-filter.pages.dev/urlhaus-filter-ag.txt) | [link](https://urlhaus-filter.pages.dev/urlhaus-filter-ag.txt) |
 63 | | [Vivaldi/Brave](#url-based-vivaldi)  | [link](https://malware-filter.gitlab.io/malware-filter/urlhaus-filter-vivaldi.txt) | [link](https://curbengh.github.io/malware-filter/urlhaus-filter-vivaldi.txt) | [link](https://curbengh.github.io/urlhaus-filter/urlhaus-filter-vivaldi.txt) | [link](https://malware-filter.gitlab.io/urlhaus-filter/urlhaus-filter-vivaldi.txt) | [link](https://malware-filter.pages.dev/urlhaus-filter-vivaldi.txt) | [link](https://urlhaus-filter.pages.dev/urlhaus-filter-vivaldi.txt) |
 64 | | [Hosts](#hosts-based) | [link](https://malware-filter.gitlab.io/malware-filter/urlhaus-filter-hosts.txt) | [link](https://curbengh.github.io/malware-filter/urlhaus-filter-hosts.txt) | [link](https://curbengh.github.io/urlhaus-filter/urlhaus-filter-hosts.txt) | [link](https://malware-filter.gitlab.io/urlhaus-filter/urlhaus-filter-hosts.txt) | [link](https://malware-filter.pages.dev/urlhaus-filter-hosts.txt) | [link](https://urlhaus-filter.pages.dev/urlhaus-filter-hosts.txt) |
 65 | | [Dnsmasq](#dnsmasq) | [link](https://malware-filter.gitlab.io/malware-filter/urlhaus-filter-dnsmasq.conf) | [link](https://curbengh.github.io/malware-filter/urlhaus-filter-dnsmasq.conf) | [link](https://curbengh.github.io/urlhaus-filter/urlhaus-filter-dnsmasq.conf) | [link](https://malware-filter.gitlab.io/urlhaus-filter/urlhaus-filter-dnsmasq.conf) | [link](https://malware-filter.pages.dev/urlhaus-filter-dnsmasq.conf) | [link](https://urlhaus-filter.pages.dev/urlhaus-filter-dnsmasq.conf) |
 66 | | BIND [zone](#bind) | [link](https://malware-filter.gitlab.io/malware-filter/urlhaus-filter-bind.conf) | [link](https://curbengh.github.io/malware-filter/urlhaus-filter-bind.conf) | [link](https://curbengh.github.io/urlhaus-filter/urlhaus-filter-bind.conf) | [link](https://malware-filter.gitlab.io/urlhaus-filter/urlhaus-filter-bind.conf) | [link](https://malware-filter.pages.dev/urlhaus-filter-bind.conf) | [link](https://urlhaus-filter.pages.dev/urlhaus-filter-bind.conf) |
 67 | | BIND [RPZ](#response-policy-zone) | [link](https://malware-filter.gitlab.io/malware-filter/urlhaus-filter-rpz.conf) | [link](https://curbengh.github.io/malware-filter/urlhaus-filter-rpz.conf) | [link](https://curbengh.github.io/urlhaus-filter/urlhaus-filter-rpz.conf) | [link](https://malware-filter.gitlab.io/urlhaus-filter/urlhaus-filter-rpz.conf) | [link](https://malware-filter.pages.dev/urlhaus-filter-rpz.conf) | [link](https://urlhaus-filter.pages.dev/urlhaus-filter-rpz.conf) |
 68 | | [dnscrypt-proxy](#dnscrypt-proxy) | [names.txt](https://malware-filter.gitlab.io/malware-filter/urlhaus-filter-dnscrypt-blocked-names.txt), [ips.txt](https://malware-filter.gitlab.io/malware-filter/urlhaus-filter-dnscrypt-blocked-ips.txt) | [names.txt](https://curbengh.github.io/malware-filter/urlhaus-filter-dnscrypt-blocked-names.txt), [ips.txt](https://curbengh.github.io/malware-filter/urlhaus-filter-dnscrypt-blocked-ips.txt) | [names.txt](https://curbengh.github.io/urlhaus-filter/urlhaus-filter-dnscrypt-blocked-names.txt), [ips.txt](https://curbengh.github.io/urlhaus-filter/urlhaus-filter-dnscrypt-blocked-ips.txt) | [names.txt](https://malware-filter.gitlab.io/urlhaus-filter/urlhaus-filter-dnscrypt-blocked-names.txt), [ips.txt](https://malware-filter.gitlab.io/urlhaus-filter/urlhaus-filter-dnscrypt-blocked-ips.txt) | [names.txt](https://malware-filter.pages.dev/urlhaus-filter-dnscrypt-blocked-names.txt), [ips.txt](https://malware-filter.pages.dev/urlhaus-filter-dnscrypt-blocked-ips.txt) | [names.txt](https://urlhaus-filter.pages.dev/urlhaus-filter-dnscrypt-blocked-names.txt), [ips.txt](https://urlhaus-filter.pages.dev/urlhaus-filter-dnscrypt-blocked-ips.txt) |
 69 | | [blocky](#wildcard-asterisk) | [link](https://malware-filter.gitlab.io/malware-filter/urlhaus-filter-wildcard.txt) | [link](https://curbengh.github.io/malware-filter/urlhaus-filter-wildcard.txt) | [link](https://curbengh.github.io/phishing-filter/urlhaus-filter-wildcard.txt) | [link](https://malware-filter.gitlab.io/phishing-filter/urlhaus-filter-wildcard.txt) | [link](https://malware-filter.pages.dev/urlhaus-filter-wildcard.txt) | [link](https://phishing-filter.pages.dev/urlhaus-filter-wildcard.txt) |
 70 | | [Internet Explorer](#tracking-protection-list-ie) | [link](https://malware-filter.gitlab.io/malware-filter/urlhaus-filter.tpl) | [link](https://curbengh.github.io/malware-filter/urlhaus-filter.tpl) | [link](https://curbengh.github.io/urlhaus-filter/urlhaus-filter.tpl) | [link](https://malware-filter.gitlab.io/urlhaus-filter/urlhaus-filter.tpl) | [link](https://malware-filter.pages.dev/urlhaus-filter.tpl) | [link](https://urlhaus-filter.pages.dev/urlhaus-filter.tpl) |
 71 | 
 72 | For other programs, see [Compatibility](https://gitlab.com/malware-filter/malware-filter/wikis/compatibility) page in the wiki.
 73 | 
 74 | Check out my other filters:
 75 | 
 76 | - [phishing-filter](https://gitlab.com/malware-filter/phishing-filter)
 77 | - [pup-filter](https://gitlab.com/malware-filter/pup-filter)
 78 | - [tracking-filter](https://gitlab.com/malware-filter/tracking-filter)
 79 | - [vn-badsite-filter](https://gitlab.com/malware-filter/vn-badsite-filter)
 80 | 
 81 | ## URL-based
 82 | 
 83 | Import the full version into uBO to block online and **offline** malicious websites.
 84 | 
 85 | Lite version includes **online** links only. Enabled by default in uBO >=[1.28.2](https://github.com/gorhill/uBlock/releases/tag/1.28.2)
 86 | 
 87 | **Note:** Lite version is 99% smaller by excluding offline urls. The status of urls is determined by the upstream Abuse.ch. However, the test is not 100% accurate and some malicious urls that are otherwise accessible may be missed. If bandwidth (9 MB/day) is not a constraint, I recommend the regular version; browser extensions may utilise [HTTP compression](https://developer.mozilla.org/en-US/docs/Web/HTTP/Compression) that can save 70% of bandwidth.
 88 | 
 89 | Regular version contains >260K filters, do note that uBO can [easily handle](https://github.com/uBlockOrigin/uBlock-issues/issues/338#issuecomment-452843669) 500K filters.
 90 | 
 91 | If you've installed the lite version but prefer to use the regular version, it's better to remove it beforehand. Having two versions at the same time won't cause any conflict issue, uBO can detect duplicate network filters and adjust accordingly, but it's a waste of your bandwidth.
 92 | 
 93 | **AdGuard Home** users should use [this blocklist](#domain-based-adguard-home).
 94 | 
 95 | ### Youtube compatibility
 96 | 
 97 | [AdGuard format](#url-based-adguard) may have less youtube [issue](https://github.com/gorhill/uBlock/commit/402e2ebf57).
 98 | 
 99 | ## URL-based (AdGuard)
100 | 
101 | Import the full version into AdGuard browser extensions to block online and **offline** malicious websites.
102 | 
103 | Lite version includes **online** links only.
104 | 
105 | ## URL-based (Vivaldi)
106 | 
107 | For Vivaldi, blocking level must be at least "Block Trackers". Import the full version into Vivaldi's **Tracker Blocking Sources** to block online and **offline** malicious websites.
108 | 
109 | For Brave, "Trackers & ads blocking" must be set to Aggressive. Import it under Shields > Content filtering > Add custom filter lists.
110 | 
111 | Lite version includes **online** links only.
112 | 
113 | ## Domain-based
114 | 
115 | This blocklist includes domains and IP addresses.
116 | 
117 | ## Wildcard asterisk
118 | 
119 | This blocklist includes domains and IP addresses.
120 | 
121 | ## Domain-based (AdGuard Home)
122 | 
123 | This AdGuard Home-compatible blocklist includes domains and IP addresses. Also compatible with Pi-hole.
124 | 
125 | ## Hosts-based
126 | 
127 | This blocklist includes domains only.
128 | 
129 | ## Dnsmasq
130 | 
131 | This blocklist includes domains only.
132 | 
133 | Save the ruleset to "/usr/local/etc/dnsmasq/urlhaus-filter-dnsmasq.conf". Refer to this [guide](https://gitlab.com/malware-filter/malware-filter/wikis/update-filter) for auto-update.
134 | 
135 | Configure dnsmasq to use the blocklist:
136 | 
137 | `printf "\nconf-file=/usr/local/etc/dnsmasq/urlhaus-filter-dnsmasq.conf\n" >> /etc/dnsmasq.conf`
138 | 
139 | ## BIND
140 | 
141 | This blocklist includes domains only.
142 | 
143 | Save the ruleset to "/usr/local/etc/bind/urlhaus-filter-bind.conf". Refer to this [guide](https://gitlab.com/malware-filter/malware-filter/wikis/update-filter) for auto-update.
144 | 
145 | Configure BIND to use the blocklist:
146 | 
147 | `printf '\ninclude "/usr/local/etc/bind/urlhaus-filter-bind.conf";\n' >> /etc/bind/named.conf`
148 | 
149 | Add this to "/etc/bind/null.zone.file" (skip this step if the file already exists):
150 | 
151 | ```
152 | $TTL    86400   ; one day
153 | @       IN      SOA     ns.nullzone.loc. ns.nullzone.loc. (
154 |                2017102203
155 |                     28800
156 |                      7200
157 |                    864000
158 |                     86400 )
159 |                 NS      ns.nullzone.loc.
160 |                 A       0.0.0.0
161 | @       IN      A       0.0.0.0
162 | *       IN      A       0.0.0.0
163 | ```
164 | 
165 | Zone file is derived from [here](https://github.com/tomzuu/blacklist-named/blob/master/null.zone.file).
166 | 
167 | </details>
168 | 
169 | ## Response Policy Zone
170 | 
171 | This blocklist includes domains only.
172 | 
173 | ## Unbound
174 | 
175 | This blocklist includes domains only.
176 | 
177 | Save the rulesets to "/usr/local/etc/unbound/urlhaus-filter-unbound.conf". Refer to this [guide](https://gitlab.com/malware-filter/malware-filter/wikis/update-filter) for auto-update.
178 | 
179 | Configure Unbound to use the blocklist:
180 | 
181 | `printf '\n  include: "/usr/local/etc/unbound/urlhaus-filter-unbound.conf"\n' >> /etc/unbound/unbound.conf`
182 | 
183 | ## dnscrypt-proxy
184 | 
185 | Save the rulesets to "/etc/dnscrypt-proxy/". Refer to this [guide](https://gitlab.com/malware-filter/malware-filter/wikis/update-filter) for auto-update.
186 | 
187 | Configure dnscrypt-proxy to use the blocklist:
188 | 
189 | ```diff
190 | [blocked_names]
191 | +  blocked_names_file = '/etc/dnscrypt-proxy/urlhaus-filter-dnscrypt-blocked-names.txt'
192 | 
193 | [blocked_ips]
194 | +  blocked_ips_file = '/etc/dnscrypt-proxy/urlhaus-filter-dnscrypt-blocked-ips.txt'
195 | ```
196 | 
197 | ## Snort2
198 | 
199 | This ruleset includes online URLs only. Not compatible with [Snort3](#snort3). Save the ruleset to "/etc/snort/rules/urlhaus-filter-snort2-online.rules". Refer to this [guide](https://gitlab.com/malware-filter/malware-filter/wikis/update-filter) for auto-update.
200 | 
201 | Configure Snort to use the ruleset:
202 | 
203 | `printf "\ninclude \$RULE_PATH/urlhaus-filter-snort2-online.rules\n" >> /etc/snort/snort.conf`
204 | 
205 | ## Snort3
206 | 
207 | This ruleset includes online URLs only. Not compatible with [Snort2](#snort2).
208 | 
209 | Save the ruleset to "/etc/snort/rules/urlhaus-filter-snort3-online.rules". Refer to this [guide](https://gitlab.com/malware-filter/malware-filter/wikis/update-filter) for auto-update.
210 | 
211 | Configure Snort to use the ruleset:
212 | 
213 | ```diff
214 | # /etc/snort/snort.lua
215 | ips =
216 | {
217 |   variables = default_variables,
218 | +  include = 'rules/urlhaus-filter-snort3-online.rules'
219 | }
220 | ```
221 | 
222 | ## Suricata
223 | 
224 | This ruleset includes online URLs only.
225 | 
226 | Save the ruleset to "/etc/suricata/rules/urlhaus-filter-suricata-online.rules". Refer to this [guide](https://gitlab.com/malware-filter/malware-filter/wikis/update-filter) for auto-update.
227 | 
228 | Configure Suricata to use the ruleset:
229 | 
230 | ```diff
231 | # /etc/suricata/suricata.yaml
232 | rule-files:
233 |   - local.rules
234 | +  - urlhaus-filter-suricata-online.rules
235 | ```
236 | 
237 | ### Suricata (SNI)
238 | 
239 | This ruleset includes online domains only. It enables Suricata to detect malicious HTTPS-enabled domains by inspecting the SNI in the [unencrypted ClientHello](https://en.wikipedia.org/wiki/Server_Name_Indication#Security_implications) message. However, there is increasing support for encrypted Client Hello which defeats SNI inspection.
240 | 
241 | ## Splunk
242 | 
243 | A CSV file for Splunk [lookup](https://docs.splunk.com/Documentation/Splunk/latest/Knowledge/Aboutlookupsandfieldactions). This ruleset includes online URLs only.
244 | 
245 | Either upload the file via GUI or save the file in `$SPLUNK_HOME/Splunk/etc/system/lookups` or app-specific `$SPLUNK_HOME/etc/YourApp/apps/search/lookups`
246 | 
247 | Or use [malware-filter add-on](https://splunkbase.splunk.com/app/6970) to install this lookup and optionally auto-update it.
248 | 
249 | Columns:
250 | 
251 | | host | path | message | updated |
252 | | --- | --- | --- | --- |
253 | | example.com  | | urlhaus-filter malicious website detected | 2022-12-21T12:34:56Z |
254 | | example2.com | /some-path | urlhaus-filter malicious website detected | 2022-12-21T12:34:56Z |
255 | 
256 | ## Tracking Protection List (IE)
257 | 
258 | This blocklist includes domains and IP addresses. Supported in Internet Explorer 9+. [Install guide](https://superuser.com/a/550539)
259 | 
260 | ## Third-party mirrors
261 | 
262 | <details>
263 | <summary>iosprivacy/urlhaus-filter-mirror</summary>
264 | 
265 | TBC
266 | 
267 | </details>
268 | 
269 | ## Compressed version
270 | 
271 | All filters are also available as gzip- and brotli-compressed.
272 | 
273 | - Gzip: https://malware-filter.gitlab.io/malware-filter/urlhaus-filter.txt.gz
274 | - Brotli: https://malware-filter.gitlab.io/malware-filter/urlhaus-filter.txt.br
275 | - Zstd: https://malware-filter.gitlab.io/malware-filter/urlhaus-filter.txt.zst
276 | 
277 | ## Issues
278 | 
279 | This blocklist operates by blocking the **whole** website, instead of specific webpages; exceptions are made on popular websites (e.g. `https://docs.google.com/`), in which webpages are specified instead (e.g. `https://docs.google.com/malware-page`). Malicious webpages are only listed in the [URL-based](#url-based) filter, popular websites are excluded from other filters.
280 | 
281 | _Popular_ websites are as listed in the [Umbrella Popularity List](https://s3-us-west-1.amazonaws.com/umbrella-static/index.html) (top 1M domains + subdomains), [Tranco List](https://tranco-list.eu/) (top 1M domains), [Cloudflare Radar](https://developers.cloudflare.com/radar/investigate/domain-ranking-datasets/) (top 1M domains) and this [custom list](src/exclude.txt).
282 | 
283 | If you wish to exclude certain website(s) that you believe is sufficiently well-known, please create an [issue](https://gitlab.com/malware-filter/urlhaus-filter/issues) or [merge request](https://gitlab.com/malware-filter/urlhaus-filter/merge_requests). If the website is quite obscure but you still want to visit it, you can add a new line `||legitsite.com^$badfilter` to "My filters" tab of uBO; use a subdomain if relevant, `||sub.legitsite.com^$badfilter`.
284 | 
285 | This filter **only** accepts new malware URLs from [URLhaus](https://urlhaus.abuse.ch/).
286 | 
287 | Please report new malware URL to the upstream maintainer through https://urlhaus.abuse.ch/api/#submit.
288 | 
289 | ## Cloning
290 | 
291 | Getting the last five revisions should be sufficient for a valid MR.
292 | 
293 | `git clone --depth 5 https://gitlab.com/malware-filter/urlhaus-filter.git`
294 | 
295 | ## FAQ and Guides
296 | 
297 | See [wiki](https://gitlab.com/malware-filter/malware-filter/-/wikis/home)
298 | 
299 | ## CI Variables
300 | 
301 | Optional variables:
302 | 
303 | - `CLOUDFLARE_BUILD_HOOK`: Deploy to Cloudflare Pages.
304 | - `NETLIFY_SITE_ID`: Deploy to Netlify.
305 | - `CF_API`: Include Cloudflare Radar [domains ranking](https://developers.cloudflare.com/radar/investigate/domain-ranking-datasets/). [Guide](https://developers.cloudflare.com/radar/get-started/first-request/) to create an API token.
306 | 
307 | ## Repository Mirrors
308 | 
309 | https://gitlab.com/curben/blog#repository-mirrors
310 | 
311 | ## License
312 | 
313 | [Creative Commons Zero v1.0 Universal](LICENSE-CC0.md) and [MIT License](LICENSE)
314 | 
315 | [URLhaus](https://urlhaus.abuse.ch/): [CC0](https://creativecommons.org/publicdomain/zero/1.0/)
316 | 
317 | [Tranco List](https://tranco-list.eu/): [MIT License](https://choosealicense.com/licenses/mit/)
318 | 
319 | [Umbrella Popularity List](https://s3-us-west-1.amazonaws.com/umbrella-static/index.html): Available free of charge by Cisco Umbrella
320 | 
321 | [Cloudflare Radar](https://developers.cloudflare.com/radar/investigate/domain-ranking-datasets/): Available to free Cloudflare account
322 | 
323 | This repository is not endorsed by Abuse.ch.
324 | 


--------------------------------------------------------------------------------