├── .air.toml ├── .gitattributes ├── .github ├── FUNDING.yml ├── PULL_REQUEST_TEMPLATE.md ├── dependabot.yml └── workflows │ ├── docker-pr.yml │ ├── docker.yml │ ├── docs-deploy.yml │ ├── docs-test.yml │ ├── go.yml │ ├── package-builds-stable.yml │ ├── package-builds-unstable.yml │ └── zizmor.yml ├── .gitignore ├── .ko.yaml ├── Brewfile ├── LICENSE ├── Makefile ├── README.md ├── VERSION ├── anubis.go ├── cmd ├── anubis │ ├── .gitignore │ └── main.go └── containerbuild │ ├── .gitignore │ └── main.go ├── data ├── apps │ └── gitea-rss-feeds.yaml ├── botPolicies.json ├── botPolicies.yaml ├── bots │ ├── ai-robots-txt.yaml │ ├── cloudflare-workers.yaml │ ├── headless-browsers.yaml │ └── us-ai-scraper.yaml ├── common │ ├── allow-private-addresses.yaml │ └── keep-internet-working.yaml ├── crawlers │ ├── bingbot.yaml │ ├── duckduckbot.yaml │ ├── googlebot.yaml │ ├── internet-archive.yaml │ ├── kagibot.yaml │ ├── marginalia.yaml │ ├── mojeekbot.yaml │ └── qwantbot.yaml └── embed.go ├── decaymap ├── decaymap.go └── decaymap_test.go ├── docs ├── .dockerignore ├── .gitignore ├── Dockerfile ├── README.md ├── docs │ ├── CHANGELOG.md │ ├── admin │ │ ├── _category_.json │ │ ├── algorithm-selection.mdx │ │ ├── caveats-gitea-forgejo.mdx │ │ ├── configuration │ │ │ ├── _category_.json │ │ │ ├── import.mdx │ │ │ └── open-graph.mdx │ │ ├── default-allow-behavior.mdx │ │ ├── environments │ │ │ ├── _category_.json │ │ │ ├── apache.mdx │ │ │ ├── docker-compose.mdx │ │ │ ├── kubernetes.mdx │ │ │ ├── nginx.mdx │ │ │ └── traefik.mdx │ │ ├── installation.mdx │ │ ├── native-install.mdx │ │ └── policies.mdx │ ├── design │ │ ├── _category_.json │ │ ├── how-anubis-works.mdx │ │ └── why-proof-of-work.mdx │ ├── developer │ │ ├── _category_.json │ │ ├── building-anubis.md │ │ ├── code-quality.md │ │ ├── local-dev.md │ │ └── signed-commits.md │ ├── funding.md │ ├── index.mdx │ └── user │ │ ├── _category_.json │ │ ├── known-broken-extensions.md │ │ ├── known-instances.md │ │ └── why-see-challenge.md ├── docusaurus.config.ts ├── manifest │ ├── deployment.yaml │ ├── ingress.yaml │ ├── kustomization.yaml │ ├── onionservice.yaml │ └── service.yaml ├── package-lock.json ├── package.json ├── sidebars.ts ├── src │ ├── components │ │ ├── HomepageFeatures │ │ │ ├── index.tsx │ │ │ └── styles.module.css │ │ └── RandomKey │ │ │ └── index.tsx │ ├── css │ │ └── custom.css │ └── pages │ │ ├── index.module.css │ │ └── index.tsx ├── static │ ├── .nojekyll │ └── img │ │ ├── android-chrome-512x512.png │ │ ├── docusaurus-social-card.jpg │ │ ├── docusaurus.png │ │ ├── favicon.ico │ │ ├── favicon.webp │ │ ├── happy.webp │ │ ├── logo.svg │ │ ├── undraw_docusaurus_mountain.svg │ │ ├── undraw_docusaurus_react.svg │ │ └── undraw_docusaurus_tree.svg └── tsconfig.json ├── go.mod ├── go.sum ├── internal ├── dnsbl │ ├── dnsbl.go │ ├── dnsbl_test.go │ └── droneblresponse_string.go ├── hash.go ├── headers.go ├── ogtags │ ├── cache.go │ ├── cache_test.go │ ├── fetch.go │ ├── fetch_test.go │ ├── integration_test.go │ ├── ogtags.go │ ├── ogtags_test.go │ ├── parse.go │ └── parse_test.go ├── slog.go └── test │ ├── playwright_test.go │ └── var │ └── .gitignore ├── lib ├── anubis.go ├── anubis_test.go ├── http.go ├── policy │ ├── bot.go │ ├── checker.go │ ├── checker_test.go │ ├── checkresult.go │ ├── config │ │ ├── config.go │ │ ├── config_test.go │ │ └── testdata │ │ │ ├── bad │ │ │ ├── badregexes.json │ │ │ ├── badregexes.yaml │ │ │ ├── import_and_bot.json │ │ │ ├── import_and_bot.yaml │ │ │ ├── import_invalid_file.json │ │ │ ├── import_invalid_file.yaml │ │ │ ├── invalid.json │ │ │ ├── invalid.yaml │ │ │ ├── nobots.json │ │ │ └── nobots.yaml │ │ │ ├── good │ │ │ ├── allow_everyone.json │ │ │ ├── allow_everyone.yaml │ │ │ ├── block_cf_workers.json │ │ │ ├── block_cf_workers.yaml │ │ │ ├── challengemozilla.json │ │ │ ├── challengemozilla.yaml │ │ │ ├── everything_blocked.json │ │ │ ├── everything_blocked.yaml │ │ │ ├── import_filesystem.json │ │ │ ├── import_filesystem.yaml │ │ │ ├── import_keep_internet_working.json │ │ │ └── import_keep_internet_working.yaml │ │ │ ├── hack-test.json │ │ │ └── hack-test.yaml │ ├── policy.go │ ├── policy_test.go │ └── testdata │ │ ├── hack-test.json │ │ └── hack-test.yaml └── random.go ├── package-lock.json ├── package.json ├── run ├── anubis.freebsd ├── anubis@.service └── default.env ├── var └── .gitignore ├── web ├── build.sh ├── embed.go ├── index.go ├── index.templ ├── index_templ.go ├── js │ ├── bench.mjs │ ├── main.mjs │ ├── proof-of-work-slow.mjs │ ├── proof-of-work.mjs │ └── video.mjs └── static │ ├── img │ ├── happy.webp │ ├── pensive.webp │ └── reject.webp │ ├── js │ └── .gitignore │ ├── robots.txt │ └── testdata │ └── black.mp4 ├── xess ├── .gitignore ├── build.sh ├── postcss.config.js ├── static │ ├── geist.woff2 │ ├── iosevka-curly.woff2 │ ├── podkova.css │ └── podkova.woff2 ├── xess.css ├── xess.go ├── xess.templ └── xess_templ.go └── yeetfile.js /.air.toml: -------------------------------------------------------------------------------- 1 | root = "." 2 | tmp_dir = "var" 3 | 4 | [build] 5 | cmd = "go build -o ./var/main ./cmd/anubis" 6 | bin = "./var/main" 7 | args = ["--use-remote-address"] 8 | exclude_dir = ["var", "vendor", "docs", "node_modules"] 9 | 10 | [logger] 11 | time = true 12 | # to change flags at runtime, prepend with -- e.g. $ air -- --target http://localhost:3000 --difficulty 20 --use-remote-address -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | web/index_templ.go linguist-generated 2 | -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | patreon: cadey 2 | github: xe -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | 6 | 7 | Checklist: 8 | 9 | - [ ] Added a description of the changes to the `[Unreleased]` section of docs/docs/CHANGELOG.md 10 | - [ ] Added test cases to [the relevant parts of the codebase](https://anubis.techaro.lol/docs/developer/code-quality) 11 | - [ ] Ran integration tests `npm run test:integration` (unsupported on Windows, please use WSL) 12 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: github-actions 4 | directory: / 5 | schedule: 6 | interval: weekly 7 | groups: 8 | github-actions: 9 | patterns: 10 | - "*" 11 | 12 | - package-ecosystem: gomod 13 | directory: / 14 | schedule: 15 | interval: weekly 16 | groups: 17 | gomod: 18 | patterns: 19 | - "*" 20 | 21 | - package-ecosystem: npm 22 | directory: / 23 | schedule: 24 | interval: weekly 25 | groups: 26 | npm: 27 | patterns: 28 | - "*" 29 | -------------------------------------------------------------------------------- /.github/workflows/docker-pr.yml: -------------------------------------------------------------------------------- 1 | name: Docker image builds (pull requests) 2 | 3 | on: 4 | pull_request: 5 | branches: [ "main" ] 6 | 7 | env: 8 | DOCKER_METADATA_SET_OUTPUT_ENV: "true" 9 | 10 | permissions: 11 | contents: read 12 | 13 | jobs: 14 | build: 15 | runs-on: ubuntu-24.04 16 | steps: 17 | - name: Checkout code 18 | uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 19 | with: 20 | fetch-tags: true 21 | fetch-depth: 0 22 | persist-credentials: false 23 | 24 | - name: Set up Homebrew 25 | uses: Homebrew/actions/setup-homebrew@master 26 | 27 | - name: Setup Homebrew cellar cache 28 | uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3 29 | with: 30 | path: | 31 | /home/linuxbrew/.linuxbrew/Cellar 32 | /home/linuxbrew/.linuxbrew/bin 33 | /home/linuxbrew/.linuxbrew/etc 34 | /home/linuxbrew/.linuxbrew/include 35 | /home/linuxbrew/.linuxbrew/lib 36 | /home/linuxbrew/.linuxbrew/opt 37 | /home/linuxbrew/.linuxbrew/sbin 38 | /home/linuxbrew/.linuxbrew/share 39 | /home/linuxbrew/.linuxbrew/var 40 | key: ${{ runner.os }}-go-homebrew-cellar-${{ hashFiles('go.sum') }} 41 | restore-keys: | 42 | ${{ runner.os }}-go-homebrew-cellar- 43 | 44 | - name: Install Brew dependencies 45 | run: | 46 | brew bundle 47 | 48 | - name: Docker meta 49 | id: meta 50 | uses: docker/metadata-action@902fa8ec7d6ecbf8d84d538b9b233a880e428804 # v5.7.0 51 | with: 52 | images: ghcr.io/techarohq/anubis 53 | 54 | - name: Build and push 55 | id: build 56 | run: | 57 | npm ci 58 | npm run container 59 | env: 60 | PULL_REQUEST_ID: ${{ github.event.number }} 61 | DOCKER_REPO: ghcr.io/techarohq/anubis 62 | SLOG_LEVEL: debug 63 | 64 | - run: | 65 | echo "Test this with:" 66 | echo "docker pull ${DOCKER_IMAGE}" 67 | env: 68 | DOCKER_IMAGE: ${{ steps.build.outputs.docker_image }} 69 | -------------------------------------------------------------------------------- /.github/workflows/docker.yml: -------------------------------------------------------------------------------- 1 | name: Docker image builds 2 | 3 | on: 4 | workflow_dispatch: 5 | push: 6 | branches: [ "main" ] 7 | tags: [ "v*" ] 8 | 9 | env: 10 | DOCKER_METADATA_SET_OUTPUT_ENV: "true" 11 | 12 | permissions: 13 | contents: read 14 | packages: write 15 | attestations: write 16 | id-token: write 17 | pull-requests: write 18 | 19 | jobs: 20 | build: 21 | runs-on: ubuntu-24.04 22 | steps: 23 | - name: Checkout code 24 | uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 25 | with: 26 | fetch-tags: true 27 | fetch-depth: 0 28 | persist-credentials: false 29 | 30 | - name: Set up Homebrew 31 | uses: Homebrew/actions/setup-homebrew@master 32 | 33 | - name: Setup Homebrew cellar cache 34 | uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3 35 | with: 36 | path: | 37 | /home/linuxbrew/.linuxbrew/Cellar 38 | /home/linuxbrew/.linuxbrew/bin 39 | /home/linuxbrew/.linuxbrew/etc 40 | /home/linuxbrew/.linuxbrew/include 41 | /home/linuxbrew/.linuxbrew/lib 42 | /home/linuxbrew/.linuxbrew/opt 43 | /home/linuxbrew/.linuxbrew/sbin 44 | /home/linuxbrew/.linuxbrew/share 45 | /home/linuxbrew/.linuxbrew/var 46 | key: ${{ runner.os }}-go-homebrew-cellar-${{ hashFiles('go.sum') }} 47 | restore-keys: | 48 | ${{ runner.os }}-go-homebrew-cellar- 49 | 50 | - name: Install Brew dependencies 51 | run: | 52 | brew bundle 53 | 54 | - name: Log into registry 55 | uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0 56 | with: 57 | registry: ghcr.io 58 | username: techarohq 59 | password: ${{ secrets.GITHUB_TOKEN }} 60 | 61 | - name: Docker meta 62 | id: meta 63 | uses: docker/metadata-action@902fa8ec7d6ecbf8d84d538b9b233a880e428804 # v5.7.0 64 | with: 65 | images: ghcr.io/techarohq/anubis 66 | 67 | - name: Build and push 68 | id: build 69 | run: | 70 | npm ci 71 | npm run container 72 | env: 73 | DOCKER_REPO: ghcr.io/techarohq/anubis 74 | SLOG_LEVEL: debug 75 | 76 | - name: Generate artifact attestation 77 | uses: actions/attest-build-provenance@c074443f1aee8d4aeeae555aebba3282517141b2 # v2.2.3 78 | with: 79 | subject-name: ghcr.io/techarohq/anubis 80 | subject-digest: ${{ steps.build.outputs.digest }} 81 | push-to-registry: true 82 | -------------------------------------------------------------------------------- /.github/workflows/docs-deploy.yml: -------------------------------------------------------------------------------- 1 | name: Docs deploy 2 | 3 | on: 4 | workflow_dispatch: 5 | push: 6 | branches: [ "main" ] 7 | 8 | permissions: 9 | contents: read 10 | packages: write 11 | attestations: write 12 | id-token: write 13 | 14 | jobs: 15 | build: 16 | runs-on: ubuntu-24.04 17 | 18 | steps: 19 | - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 20 | with: 21 | persist-credentials: false 22 | 23 | - name: Set up Docker Buildx 24 | uses: docker/setup-buildx-action@b5ca514318bd6ebac0fb2aedd5d36ec1b5c232a2 # v3.10.0 25 | 26 | - name: Log into registry 27 | uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0 28 | with: 29 | registry: ghcr.io 30 | username: techarohq 31 | password: ${{ secrets.GITHUB_TOKEN }} 32 | 33 | - name: Docker meta 34 | id: meta 35 | uses: docker/metadata-action@902fa8ec7d6ecbf8d84d538b9b233a880e428804 # v5.7.0 36 | with: 37 | images: ghcr.io/techarohq/anubis/docs 38 | 39 | - name: Build and push 40 | id: build 41 | uses: docker/build-push-action@471d1dc4e07e5cdedd4c2171150001c434f0b7a4 # v6.15.0 42 | with: 43 | context: ./docs 44 | cache-to: type=gha 45 | cache-from: type=gha 46 | tags: ${{ steps.meta.outputs.tags }} 47 | labels: ${{ steps.meta.outputs.labels }} 48 | platforms: linux/amd64 49 | push: true 50 | 51 | - name: Apply k8s manifests to aeacus 52 | uses: actions-hub/kubectl@9270913c29699788b51bc04becd0ebdf048ffb49 # v1.32.3 53 | env: 54 | KUBE_CONFIG: ${{ secrets.AEACUS_KUBECONFIG }} 55 | with: 56 | args: apply -k docs/manifest 57 | 58 | - name: Apply k8s manifests to aeacus 59 | uses: actions-hub/kubectl@9270913c29699788b51bc04becd0ebdf048ffb49 # v1.32.3 60 | env: 61 | KUBE_CONFIG: ${{ secrets.AEACUS_KUBECONFIG }} 62 | with: 63 | args: rollout restart -n default deploy/anubis-docs 64 | -------------------------------------------------------------------------------- /.github/workflows/docs-test.yml: -------------------------------------------------------------------------------- 1 | name: Docs test build 2 | 3 | on: 4 | pull_request: 5 | branches: [ "main" ] 6 | 7 | permissions: 8 | contents: read 9 | actions: write 10 | 11 | jobs: 12 | build: 13 | runs-on: ubuntu-24.04 14 | 15 | steps: 16 | - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 17 | with: 18 | persist-credentials: false 19 | 20 | - name: Set up Docker Buildx 21 | uses: docker/setup-buildx-action@b5ca514318bd6ebac0fb2aedd5d36ec1b5c232a2 # v3.10.0 22 | 23 | - name: Docker meta 24 | id: meta 25 | uses: docker/metadata-action@902fa8ec7d6ecbf8d84d538b9b233a880e428804 # v5.7.0 26 | with: 27 | images: ghcr.io/techarohq/anubis/docs 28 | 29 | - name: Build and push 30 | id: build 31 | uses: docker/build-push-action@471d1dc4e07e5cdedd4c2171150001c434f0b7a4 # v6.15.0 32 | with: 33 | context: ./docs 34 | cache-to: type=gha 35 | cache-from: type=gha 36 | tags: ${{ steps.meta.outputs.tags }} 37 | labels: ${{ steps.meta.outputs.labels }} 38 | platforms: linux/amd64 39 | push: false 40 | -------------------------------------------------------------------------------- /.github/workflows/go.yml: -------------------------------------------------------------------------------- 1 | name: Go 2 | 3 | on: 4 | push: 5 | branches: [ "main" ] 6 | pull_request: 7 | branches: [ "main" ] 8 | 9 | permissions: 10 | contents: read 11 | actions: write 12 | 13 | jobs: 14 | go_tests: 15 | #runs-on: alrest-techarohq 16 | runs-on: ubuntu-24.04 17 | steps: 18 | - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 19 | with: 20 | persist-credentials: false 21 | 22 | - name: build essential 23 | run: | 24 | sudo apt-get update 25 | sudo apt-get install -y build-essential 26 | 27 | - name: Set up Homebrew 28 | uses: Homebrew/actions/setup-homebrew@master 29 | 30 | - name: Setup Homebrew cellar cache 31 | uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3 32 | with: 33 | path: | 34 | /home/linuxbrew/.linuxbrew/Cellar 35 | /home/linuxbrew/.linuxbrew/bin 36 | /home/linuxbrew/.linuxbrew/etc 37 | /home/linuxbrew/.linuxbrew/include 38 | /home/linuxbrew/.linuxbrew/lib 39 | /home/linuxbrew/.linuxbrew/opt 40 | /home/linuxbrew/.linuxbrew/sbin 41 | /home/linuxbrew/.linuxbrew/share 42 | /home/linuxbrew/.linuxbrew/var 43 | key: ${{ runner.os }}-go-homebrew-cellar-${{ hashFiles('go.sum') }} 44 | restore-keys: | 45 | ${{ runner.os }}-go-homebrew-cellar- 46 | 47 | - name: Install Brew dependencies 48 | run: | 49 | brew bundle 50 | 51 | - name: Setup Golang caches 52 | uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3 53 | with: 54 | path: | 55 | ~/.cache/go-build 56 | ~/go/pkg/mod 57 | key: ${{ runner.os }}-golang-${{ hashFiles('**/go.sum') }} 58 | restore-keys: | 59 | ${{ runner.os }}-golang- 60 | 61 | - name: Cache playwright binaries 62 | uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3 63 | id: playwright-cache 64 | with: 65 | path: | 66 | ~/.cache/ms-playwright 67 | key: ${{ runner.os }}-playwright-${{ hashFiles('**/go.sum') }} 68 | 69 | - name: install playwright browsers 70 | run: | 71 | npx --yes playwright@1.51.1 install --with-deps 72 | npx --yes playwright@1.51.1 run-server --port 9001 & 73 | 74 | - name: install node deps 75 | run: | 76 | npm ci 77 | npm run assets 78 | 79 | - name: Build 80 | run: npm run build 81 | 82 | - name: Test 83 | run: npm run test 84 | 85 | - uses: dominikh/staticcheck-action@fe1dd0c3658873b46f8c9bb3291096a617310ca6 # v1.3.1 86 | with: 87 | version: "latest" 88 | -------------------------------------------------------------------------------- /.github/workflows/package-builds-stable.yml: -------------------------------------------------------------------------------- 1 | name: Package builds (stable) 2 | 3 | on: 4 | release: 5 | types: [published] 6 | 7 | permissions: 8 | contents: write 9 | actions: write 10 | 11 | jobs: 12 | package_builds: 13 | #runs-on: alrest-techarohq 14 | runs-on: ubuntu-24.04 15 | steps: 16 | - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 17 | with: 18 | persist-credentials: false 19 | fetch-tags: true 20 | fetch-depth: 0 21 | 22 | - name: build essential 23 | run: | 24 | sudo apt-get update 25 | sudo apt-get install -y build-essential 26 | 27 | - name: Set up Homebrew 28 | uses: Homebrew/actions/setup-homebrew@master 29 | 30 | - name: Setup Homebrew cellar cache 31 | uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3 32 | with: 33 | path: | 34 | /home/linuxbrew/.linuxbrew/Cellar 35 | /home/linuxbrew/.linuxbrew/bin 36 | /home/linuxbrew/.linuxbrew/etc 37 | /home/linuxbrew/.linuxbrew/include 38 | /home/linuxbrew/.linuxbrew/lib 39 | /home/linuxbrew/.linuxbrew/opt 40 | /home/linuxbrew/.linuxbrew/sbin 41 | /home/linuxbrew/.linuxbrew/share 42 | /home/linuxbrew/.linuxbrew/var 43 | key: ${{ runner.os }}-go-homebrew-cellar-${{ hashFiles('go.sum') }} 44 | restore-keys: | 45 | ${{ runner.os }}-go-homebrew-cellar- 46 | 47 | - name: Install Brew dependencies 48 | run: | 49 | brew bundle 50 | 51 | - name: Setup Golang caches 52 | uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3 53 | with: 54 | path: | 55 | ~/.cache/go-build 56 | ~/go/pkg/mod 57 | key: ${{ runner.os }}-golang-${{ hashFiles('**/go.sum') }} 58 | restore-keys: | 59 | ${{ runner.os }}-golang- 60 | 61 | - name: install node deps 62 | run: | 63 | npm ci 64 | 65 | - name: Build Packages 66 | run: | 67 | wget https://github.com/TecharoHQ/yeet/releases/download/v0.1.1/yeet_0.1.1_amd64.deb -O var/yeet.deb 68 | sudo apt -y install -f ./var/yeet.deb 69 | rm ./var/yeet.deb 70 | yeet 71 | 72 | - name: Upload released artifacts 73 | env: 74 | GITHUB_TOKEN: ${{ github.TOKEN }} 75 | RELEASE_VERSION: ${{github.event.release.tag_name}} 76 | shell: bash 77 | run: | 78 | RELEASE="${RELEASE_VERSION}" 79 | cd var 80 | for file in *; do 81 | gh release upload $RELEASE $file 82 | done 83 | -------------------------------------------------------------------------------- /.github/workflows/package-builds-unstable.yml: -------------------------------------------------------------------------------- 1 | name: Package builds (unstable) 2 | 3 | on: 4 | push: 5 | branches: [ "main" ] 6 | pull_request: 7 | branches: [ "main" ] 8 | 9 | permissions: 10 | contents: read 11 | actions: write 12 | 13 | jobs: 14 | package_builds: 15 | #runs-on: alrest-techarohq 16 | runs-on: ubuntu-24.04 17 | steps: 18 | - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 19 | with: 20 | persist-credentials: false 21 | fetch-tags: true 22 | fetch-depth: 0 23 | 24 | - name: build essential 25 | run: | 26 | sudo apt-get update 27 | sudo apt-get install -y build-essential 28 | 29 | - name: Set up Homebrew 30 | uses: Homebrew/actions/setup-homebrew@master 31 | 32 | - name: Setup Homebrew cellar cache 33 | uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3 34 | with: 35 | path: | 36 | /home/linuxbrew/.linuxbrew/Cellar 37 | /home/linuxbrew/.linuxbrew/bin 38 | /home/linuxbrew/.linuxbrew/etc 39 | /home/linuxbrew/.linuxbrew/include 40 | /home/linuxbrew/.linuxbrew/lib 41 | /home/linuxbrew/.linuxbrew/opt 42 | /home/linuxbrew/.linuxbrew/sbin 43 | /home/linuxbrew/.linuxbrew/share 44 | /home/linuxbrew/.linuxbrew/var 45 | key: ${{ runner.os }}-go-homebrew-cellar-${{ hashFiles('go.sum') }} 46 | restore-keys: | 47 | ${{ runner.os }}-go-homebrew-cellar- 48 | 49 | - name: Install Brew dependencies 50 | run: | 51 | brew bundle 52 | 53 | - name: Setup Golang caches 54 | uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3 55 | with: 56 | path: | 57 | ~/.cache/go-build 58 | ~/go/pkg/mod 59 | key: ${{ runner.os }}-golang-${{ hashFiles('**/go.sum') }} 60 | restore-keys: | 61 | ${{ runner.os }}-golang- 62 | 63 | - name: install node deps 64 | run: | 65 | npm ci 66 | 67 | - name: Build Packages 68 | run: | 69 | wget https://github.com/TecharoHQ/yeet/releases/download/v0.1.1/yeet_0.1.1_amd64.deb -O var/yeet.deb 70 | sudo apt -y install -f ./var/yeet.deb 71 | rm ./var/yeet.deb 72 | yeet 73 | 74 | - uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 75 | with: 76 | name: packages 77 | path: var/* 78 | -------------------------------------------------------------------------------- /.github/workflows/zizmor.yml: -------------------------------------------------------------------------------- 1 | name: zizmor 2 | 3 | on: 4 | push: 5 | paths: 6 | - '.github/workflows/*.ya?ml' 7 | pull_request: 8 | paths: 9 | - '.github/workflows/*.ya?ml' 10 | 11 | jobs: 12 | zizmor: 13 | name: zizmor latest via PyPI 14 | runs-on: ubuntu-24.04 15 | permissions: 16 | security-events: write 17 | steps: 18 | - name: Checkout repository 19 | uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 20 | with: 21 | persist-credentials: false 22 | 23 | - name: Install the latest version of uv 24 | uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86 # v5.4.2 25 | 26 | - name: Run zizmor 🌈 27 | run: uvx zizmor --format sarif . > results.sarif 28 | env: 29 | GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} 30 | 31 | - name: Upload SARIF file 32 | uses: github/codeql-action/upload-sarif@45775bd8235c68ba998cffa5171334d58593da47 # v3.28.15 33 | with: 34 | sarif_file: results.sarif 35 | category: zizmor 36 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .env 2 | *.deb 3 | *.rpm 4 | 5 | # Additional package locks 6 | pnpm-lock.yaml 7 | yarn.lock 8 | 9 | # Go binaries and test artifacts 10 | main 11 | *.test 12 | 13 | node_modules 14 | 15 | # MacOS 16 | .DS_store 17 | 18 | # Intellij 19 | .idea 20 | 21 | # how does this get here 22 | doc/VERSION 23 | -------------------------------------------------------------------------------- /.ko.yaml: -------------------------------------------------------------------------------- 1 | defaultBaseImage: cgr.dev/chainguard/static 2 | defaultPlatforms: 3 | - linux/arm64 4 | - linux/amd64 5 | - linux/arm/v7 6 | 7 | builds: 8 | - id: anubis 9 | main: ./cmd/anubis 10 | ldflags: 11 | - -s -w 12 | - -extldflags "-static" 13 | - -X github.com/TecharoHQ/anubis.Version={{.Env.VERSION}} 14 | -------------------------------------------------------------------------------- /Brewfile: -------------------------------------------------------------------------------- 1 | # programming languages 2 | brew "go@1.24" 3 | brew "node" 4 | brew "ko" 5 | brew "esbuild" 6 | brew "zstd" 7 | brew "brotli" -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2025 Xe Iaso 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | THE SOFTWARE. 20 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | VERSION= $(shell cat ./VERSION) 2 | GO?= go 3 | NPM?= npm 4 | 5 | .PHONY: build assets deps lint prebaked-build test 6 | 7 | all: build 8 | 9 | deps: 10 | $(NPM) ci 11 | $(GO) mod download 12 | 13 | assets: PATH:=$(PWD)/node_modules/.bin:$(PATH) 14 | assets: deps 15 | $(GO) generate ./... 16 | ./web/build.sh 17 | ./xess/build.sh 18 | 19 | build: assets 20 | $(GO) build -o ./var/anubis ./cmd/anubis 21 | @echo "Anubis is now built to ./var/anubis" 22 | 23 | lint: assets 24 | $(GO) vet ./... 25 | $(GO) tool staticcheck ./... 26 | 27 | prebaked-build: 28 | $(GO) build -o ./var/anubis -ldflags "-X 'github.com/TecharoHQ/anubis.Version=$(VERSION)'" ./cmd/anubis 29 | 30 | test: assets 31 | $(GO) test ./... 32 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Anubis 2 | 3 |
4 | A smiling chibi dark-skinned anthro jackal with brown hair and tall ears looking victorious with a thumbs-up 5 |
6 | 7 | ![enbyware](https://pride-badges.pony.workers.dev/static/v1?label=enbyware&labelColor=%23555&stripeWidth=8&stripeColors=FCF434%2CFFFFFF%2C9C59D1%2C2C2C2C) 8 | ![GitHub Issues or Pull Requests by label](https://img.shields.io/github/issues/TecharoHQ/anubis) 9 | ![GitHub go.mod Go version](https://img.shields.io/github/go-mod/go-version/TecharoHQ/anubis) 10 | ![language count](https://img.shields.io/github/languages/count/TecharoHQ/anubis) 11 | ![repo size](https://img.shields.io/github/repo-size/TecharoHQ/anubis) 12 | 13 | Anubis [weighs the soul of your connection](https://en.wikipedia.org/wiki/Weighing_of_souls) using a sha256 proof-of-work challenge in order to protect upstream resources from scraper bots. 14 | 15 | Installing and using this will likely result in your website not being indexed by some search engines. This is considered a feature of Anubis, not a bug. 16 | 17 | This is a bit of a nuclear response, but AI scraper bots scraping so aggressively have forced my hand. I hate that I have to do this, but this is what we get for the modern Internet because bots don't conform to standards like robots.txt, even when they claim to. 18 | 19 | In most cases, you should not need this and can probably get by using Cloudflare to protect a given origin. However, for circumstances where you can't or won't use Cloudflare, Anubis is there for you. 20 | 21 | If you want to try this out, connect to [anubis.techaro.lol](https://anubis.techaro.lol). 22 | 23 | ## Support 24 | 25 | If you run into any issues running Anubis, please [open an issue](https://github.com/TecharoHQ/anubis/issues/new?template=Blank+issue). Please include all the information I would need to diagnose your issue. 26 | 27 | For live chat, please join the [Patreon](https://patreon.com/cadey) and ask in the Patron discord in the channel `#anubis`. 28 | 29 | ## Star History 30 | 31 | [![Star History Chart](https://api.star-history.com/svg?repos=TecharoHQ/anubis&type=Date)](https://www.star-history.com/#TecharoHQ/anubis&Date) 32 | 33 | ## Packaging Status 34 | 35 | [![Packaging status](https://repology.org/badge/vertical-allrepos/anubis-anti-crawler.svg?columns=3)](https://repology.org/project/anubis-anti-crawler/versions) 36 | 37 | ## Contributors 38 | 39 | 40 | 41 | 42 | 43 | Made with [contrib.rocks](https://contrib.rocks). 44 | -------------------------------------------------------------------------------- /VERSION: -------------------------------------------------------------------------------- 1 | 1.16.0 2 | -------------------------------------------------------------------------------- /anubis.go: -------------------------------------------------------------------------------- 1 | // Package Anubis contains the version number of Anubis. 2 | package anubis 3 | 4 | // Version is the current version of Anubis. 5 | // 6 | // This variable is set at build time using the -X linker flag. If not set, 7 | // it defaults to "devel". 8 | var Version = "devel" 9 | 10 | // CookieName is the name of the cookie that Anubis uses in order to validate 11 | // access. 12 | const CookieName = "within.website-x-cmd-anubis-auth" 13 | 14 | // StaticPath is the location where all static Anubis assets are located. 15 | const StaticPath = "/.within.website/x/cmd/anubis/" 16 | 17 | // DefaultDifficulty is the default "difficulty" (number of leading zeroes) 18 | // that must be met by the client in order to pass the challenge. 19 | const DefaultDifficulty = 4 20 | -------------------------------------------------------------------------------- /cmd/anubis/.gitignore: -------------------------------------------------------------------------------- 1 | *.rpm 2 | anubis 3 | -------------------------------------------------------------------------------- /cmd/containerbuild/.gitignore: -------------------------------------------------------------------------------- 1 | images -------------------------------------------------------------------------------- /cmd/containerbuild/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "flag" 5 | "fmt" 6 | "log" 7 | "log/slog" 8 | "os" 9 | "os/exec" 10 | "path/filepath" 11 | "strings" 12 | 13 | "github.com/TecharoHQ/anubis/internal" 14 | "github.com/facebookgo/flagenv" 15 | ) 16 | 17 | var ( 18 | dockerAnnotations = flag.String("docker-annotations", os.Getenv("DOCKER_METADATA_OUTPUT_ANNOTATIONS"), "Docker image annotations") 19 | dockerLabels = flag.String("docker-labels", os.Getenv("DOCKER_METADATA_OUTPUT_LABELS"), "Docker image labels") 20 | dockerRepo = flag.String("docker-repo", "registry.int.xeserv.us/techaro/anubis", "Docker image repository for Anubis") 21 | dockerTags = flag.String("docker-tags", os.Getenv("DOCKER_METADATA_OUTPUT_TAGS"), "newline separated docker tags including the registry name") 22 | githubEventName = flag.String("github-event-name", "", "GitHub event name") 23 | pullRequestID = flag.Int("pull-request-id", -1, "GitHub pull request ID") 24 | slogLevel = flag.String("slog-level", "INFO", "logging level (see https://pkg.go.dev/log/slog#hdr-Levels)") 25 | ) 26 | 27 | func main() { 28 | flagenv.Parse() 29 | flag.Parse() 30 | 31 | internal.InitSlog(*slogLevel) 32 | 33 | koDockerRepo := strings.TrimSuffix(*dockerRepo, "/"+filepath.Base(*dockerRepo)) 34 | 35 | if *githubEventName == "pull_request" && *pullRequestID != -1 { 36 | *dockerRepo = fmt.Sprintf("ttl.sh/techaro/pr-%d/anubis", *pullRequestID) 37 | *dockerTags = fmt.Sprintf("ttl.sh/techaro/pr-%d/anubis:24h", *pullRequestID) 38 | koDockerRepo = fmt.Sprintf("ttl.sh/techaro/pr-%d", *pullRequestID) 39 | 40 | slog.Info( 41 | "Building image for pull request", 42 | "docker-repo", *dockerRepo, 43 | "docker-tags", *dockerTags, 44 | "github-event-name", *githubEventName, 45 | "pull-request-id", *pullRequestID, 46 | ) 47 | } 48 | 49 | setOutput("docker_image", strings.SplitN(*dockerTags, "\n", 2)[0]) 50 | 51 | version, err := run("git describe --tags --always --dirty") 52 | if err != nil { 53 | log.Fatal(err) 54 | } 55 | 56 | commitTimestamp, err := run("git log -1 --format='%ct'") 57 | if err != nil { 58 | log.Fatal(err) 59 | } 60 | 61 | slog.Debug( 62 | "ko env", 63 | "KO_DOCKER_REPO", koDockerRepo, 64 | "SOURCE_DATE_EPOCH", commitTimestamp, 65 | "VERSION", version, 66 | ) 67 | 68 | os.Setenv("KO_DOCKER_REPO", koDockerRepo) 69 | os.Setenv("SOURCE_DATE_EPOCH", commitTimestamp) 70 | os.Setenv("VERSION", version) 71 | 72 | setOutput("version", version) 73 | 74 | if *dockerTags == "" { 75 | log.Fatal("Must set --docker-tags or DOCKER_METADATA_OUTPUT_TAGS") 76 | } 77 | 78 | images, err := parseImageList(*dockerTags) 79 | if err != nil { 80 | log.Fatalf("can't parse images: %v", err) 81 | } 82 | 83 | for _, img := range images { 84 | if img.repository != *dockerRepo { 85 | slog.Error( 86 | "Something weird is going on. Wanted docker repo differs from contents of --docker-tags. Did a flag get set incorrectly?", 87 | "wanted", *dockerRepo, 88 | "got", img.repository, 89 | "docker-tags", *dockerTags, 90 | ) 91 | os.Exit(2) 92 | } 93 | } 94 | 95 | var tags []string 96 | for _, img := range images { 97 | tags = append(tags, img.tag) 98 | } 99 | 100 | output, err := run(fmt.Sprintf("ko build --platform=all --base-import-paths --tags=%q --image-user=1000 --image-annotation=%q --image-label=%q ./cmd/anubis | tail -n1", strings.Join(tags, ","), *dockerAnnotations, *dockerLabels)) 101 | if err != nil { 102 | log.Fatalf("can't run ko build, check stderr: %v", err) 103 | } 104 | 105 | sp := strings.SplitN(output, "@", 2) 106 | 107 | setOutput("digest", sp[1]) 108 | } 109 | 110 | type image struct { 111 | repository string 112 | tag string 113 | } 114 | 115 | func parseImageList(imageList string) ([]image, error) { 116 | images := strings.Split(imageList, "\n") 117 | var result []image 118 | for _, img := range images { 119 | if img == "" { 120 | continue 121 | } 122 | 123 | // reg.xeiaso.net/techaro/anubis:latest 124 | // repository: reg.xeiaso.net/techaro/anubis 125 | // tag: latest 126 | index := strings.LastIndex(img, ":") 127 | result = append(result, image{ 128 | repository: img[:index], 129 | tag: img[index+1:], 130 | }) 131 | } 132 | 133 | if len(result) == 0 { 134 | return nil, fmt.Errorf("no images provided, bad flags") 135 | } 136 | 137 | return result, nil 138 | } 139 | 140 | // run executes a command and returns the trimmed output. 141 | func run(command string) (string, error) { 142 | bin, err := exec.LookPath("sh") 143 | if err != nil { 144 | return "", err 145 | } 146 | slog.Debug("running command", "command", command) 147 | cmd := exec.Command(bin, "-c", command) 148 | cmd.Stderr = os.Stderr 149 | out, err := cmd.Output() 150 | if err != nil { 151 | return "", err 152 | } 153 | return strings.TrimSpace(string(out)), nil 154 | } 155 | 156 | func setOutput(key, val string) { 157 | fmt.Printf("::set-output name=%s::%s\n", key, val) 158 | } 159 | -------------------------------------------------------------------------------- /data/apps/gitea-rss-feeds.yaml: -------------------------------------------------------------------------------- 1 | # By Aibrew: https://github.com/TecharoHQ/anubis/discussions/261#discussioncomment-12821065 2 | - name: gitea-feed-atom 3 | action: ALLOW 4 | path_regex: ^/[.A-Za-z0-9_-]{1,256}?[./A-Za-z0-9_-]*\.atom$ 5 | - name: gitea-feed-rss 6 | action: ALLOW 7 | path_regex: ^/[.A-Za-z0-9_-]{1,256}?[./A-Za-z0-9_-]*\.rss$ -------------------------------------------------------------------------------- /data/botPolicies.json: -------------------------------------------------------------------------------- 1 | { 2 | "bots": [ 3 | { 4 | "import": "(data)/bots/ai-robots-txt.yaml" 5 | }, 6 | { 7 | "import": "(data)/bots/cloudflare-workers.yaml" 8 | }, 9 | { 10 | "import": "(data)/bots/headless-browsers.yaml" 11 | }, 12 | { 13 | "import": "(data)/bots/us-ai-scraper.yaml" 14 | }, 15 | { 16 | "import": "(data)/crawlers/googlebot.yaml" 17 | }, 18 | { 19 | "import": "(data)/crawlers/bingbot.yaml" 20 | }, 21 | { 22 | "import": "(data)/crawlers/duckduckbot.yaml" 23 | }, 24 | { 25 | "import": "(data)/crawlers/qwantbot.yaml" 26 | }, 27 | { 28 | "import": "(data)/crawlers/internet-archive.yaml" 29 | }, 30 | { 31 | "import": "(data)/crawlers/kagibot.yaml" 32 | }, 33 | { 34 | "import": "(data)/crawlers/marginalia.yaml" 35 | }, 36 | { 37 | "import": "(data)/crawlers/mojeekbot.yaml" 38 | }, 39 | { 40 | "import": "(data)/common/keep-internet-working.yaml" 41 | }, 42 | { 43 | "name": "generic-browser", 44 | "user_agent_regex": "Mozilla|Opera\n", 45 | "action": "CHALLENGE" 46 | } 47 | ], 48 | "dnsbl": false 49 | } -------------------------------------------------------------------------------- /data/botPolicies.yaml: -------------------------------------------------------------------------------- 1 | ## Anubis has the ability to let you import snippets of configuration into the main 2 | ## configuration file. This allows you to break up your config into smaller parts 3 | ## that get logically assembled into one big file. 4 | ## 5 | ## Of note, a bot rule can either have inline bot configuration or import a 6 | ## bot config snippet. You cannot do both in a single bot rule. 7 | ## 8 | ## Import paths can either be prefixed with (data) to import from the common/shared 9 | ## rules in the data folder in the Anubis source tree or will point to absolute/relative 10 | ## paths in your filesystem. If you don't have access to the Anubis source tree, check 11 | ## /usr/share/docs/anubis/data or in the tarball you extracted Anubis from. 12 | 13 | bots: 14 | # Pathological bots to deny 15 | - # This correlates to data/bots/ai-robots-txt.yaml in the source tree 16 | import: (data)/bots/ai-robots-txt.yaml 17 | - import: (data)/bots/cloudflare-workers.yaml 18 | - import: (data)/bots/headless-browsers.yaml 19 | - import: (data)/bots/us-ai-scraper.yaml 20 | 21 | # Search engines to allow 22 | - import: (data)/crawlers/googlebot.yaml 23 | - import: (data)/crawlers/bingbot.yaml 24 | - import: (data)/crawlers/duckduckbot.yaml 25 | - import: (data)/crawlers/qwantbot.yaml 26 | - import: (data)/crawlers/internet-archive.yaml 27 | - import: (data)/crawlers/kagibot.yaml 28 | - import: (data)/crawlers/marginalia.yaml 29 | - import: (data)/crawlers/mojeekbot.yaml 30 | 31 | # Allow common "keeping the internet working" routes (well-known, favicon, robots.txt) 32 | - import: (data)/common/keep-internet-working.yaml 33 | 34 | # # Punish any bot with "bot" in the user-agent string 35 | # # This is known to have a high false-positive rate, use at your own risk 36 | # - name: generic-bot-catchall 37 | # user_agent_regex: (?i:bot|crawler) 38 | # action: CHALLENGE 39 | # challenge: 40 | # difficulty: 16 # impossible 41 | # report_as: 4 # lie to the operator 42 | # algorithm: slow # intentionally waste CPU cycles and time 43 | 44 | # Generic catchall rule 45 | - name: generic-browser 46 | user_agent_regex: > 47 | Mozilla|Opera 48 | action: CHALLENGE 49 | 50 | dnsbl: false 51 | -------------------------------------------------------------------------------- /data/bots/ai-robots-txt.yaml: -------------------------------------------------------------------------------- 1 | - name: "ai-robots-txt" 2 | user_agent_regex: > 3 | AI2Bot|Ai2Bot-Dolma|Amazonbot|anthropic-ai|Applebot|Applebot-Extended|Brightbot 1.0|Bytespider|CCBot|ChatGPT-User|Claude-Web|ClaudeBot|cohere-ai|cohere-training-data-crawler|Crawlspace|Diffbot|DuckAssistBot|FacebookBot|FriendlyCrawler|Google-Extended|GoogleOther|GoogleOther-Image|GoogleOther-Video|GPTBot|iaskspider/2.0|ICC-Crawler|ImagesiftBot|img2dataset|ISSCyberRiskCrawler|Kangaroo Bot|Meta-ExternalAgent|Meta-ExternalFetcher|OAI-SearchBot|omgili|omgilibot|PanguBot|Perplexity-User|PerplexityBot|PetalBot|Scrapy|SemrushBot-OCOB|SemrushBot-SWA|Sidetrade indexer bot|Timpibot|VelenPublicWebCrawler|Webzio-Extended|YouBot 4 | action: DENY -------------------------------------------------------------------------------- /data/bots/cloudflare-workers.yaml: -------------------------------------------------------------------------------- 1 | - name: cloudflare-workers 2 | headers_regex: 3 | CF-Worker: .* 4 | action: DENY -------------------------------------------------------------------------------- /data/bots/headless-browsers.yaml: -------------------------------------------------------------------------------- 1 | - name: lightpanda 2 | user_agent_regex: ^LightPanda/.*$ 3 | action: DENY 4 | - name: headless-chrome 5 | user_agent_regex: HeadlessChrome 6 | action: DENY 7 | - name: headless-chromium 8 | user_agent_regex: HeadlessChromium 9 | action: DENY -------------------------------------------------------------------------------- /data/bots/us-ai-scraper.yaml: -------------------------------------------------------------------------------- 1 | - name: us-artificial-intelligence-scraper 2 | user_agent_regex: \+https\://github\.com/US-Artificial-Intelligence/scraper 3 | action: DENY -------------------------------------------------------------------------------- /data/common/allow-private-addresses.yaml: -------------------------------------------------------------------------------- 1 | - name: ipv4-rfc-1918 2 | action: ALLOW 3 | remote_addresses: 4 | - 10.0.0.0/8 5 | - 172.16.0.0/12 6 | - 192.168.0.0/16 7 | - 100.64.0.0/10 8 | - name: ipv6-ula 9 | action: ALLOW 10 | remote_addresses: 11 | - fc00::/7 12 | - name: ipv6-link-local 13 | action: ALLOW 14 | remote_addresses: 15 | - fe80::/10 -------------------------------------------------------------------------------- /data/common/keep-internet-working.yaml: -------------------------------------------------------------------------------- 1 | # Common "keeping the internet working" routes 2 | - name: well-known 3 | path_regex: ^/.well-known/.*$ 4 | action: ALLOW 5 | - name: favicon 6 | path_regex: ^/favicon.ico$ 7 | action: ALLOW 8 | - name: robots-txt 9 | path_regex: ^/robots.txt$ 10 | action: ALLOW -------------------------------------------------------------------------------- /data/crawlers/bingbot.yaml: -------------------------------------------------------------------------------- 1 | - name: bingbot 2 | user_agent_regex: \+http\://www\.bing\.com/bingbot\.htm 3 | action: ALLOW 4 | # https://www.bing.com/toolbox/bingbot.json 5 | remote_addresses: [ 6 | "157.55.39.0/24", 7 | "207.46.13.0/24", 8 | "40.77.167.0/24", 9 | "13.66.139.0/24", 10 | "13.66.144.0/24", 11 | "52.167.144.0/24", 12 | "13.67.10.16/28", 13 | "13.69.66.240/28", 14 | "13.71.172.224/28", 15 | "139.217.52.0/28", 16 | "191.233.204.224/28", 17 | "20.36.108.32/28", 18 | "20.43.120.16/28", 19 | "40.79.131.208/28", 20 | "40.79.186.176/28", 21 | "52.231.148.0/28", 22 | "20.79.107.240/28", 23 | "51.105.67.0/28", 24 | "20.125.163.80/28", 25 | "40.77.188.0/22", 26 | "65.55.210.0/24", 27 | "199.30.24.0/23", 28 | "40.77.202.0/24", 29 | "40.77.139.0/25", 30 | "20.74.197.0/28", 31 | "20.15.133.160/27", 32 | "40.77.177.0/24", 33 | "40.77.178.0/23" 34 | ] 35 | -------------------------------------------------------------------------------- /data/crawlers/internet-archive.yaml: -------------------------------------------------------------------------------- 1 | - name: internet-archive 2 | action: ALLOW 3 | # https://ipinfo.io/AS7941 4 | remote_addresses: [ 5 | "207.241.224.0/20", 6 | "208.70.24.0/21", 7 | "2620:0:9c0::/48" 8 | ] -------------------------------------------------------------------------------- /data/crawlers/kagibot.yaml: -------------------------------------------------------------------------------- 1 | - name: kagibot 2 | user_agent_regex: \+https\://kagi\.com/bot 3 | action: ALLOW 4 | # https://kagi.com/bot 5 | remote_addresses: [ 6 | "216.18.205.234/32", 7 | "35.212.27.76/32", 8 | "104.254.65.50/32", 9 | "209.151.156.194/32" 10 | ] 11 | -------------------------------------------------------------------------------- /data/crawlers/marginalia.yaml: -------------------------------------------------------------------------------- 1 | - name: marginalia 2 | user_agent_regex: search\.marginalia\.nu 3 | action: ALLOW 4 | # Received directly over email 5 | remote_addresses: [ 6 | "193.183.0.162/31", 7 | "193.183.0.164/30", 8 | "193.183.0.168/30", 9 | "193.183.0.172/31", 10 | "193.183.0.174/32" 11 | ] -------------------------------------------------------------------------------- /data/crawlers/mojeekbot.yaml: -------------------------------------------------------------------------------- 1 | - name: mojeekbot 2 | user_agent_regex: http\://www\.mojeek\.com/bot\.html 3 | action: ALLOW 4 | # https://www.mojeek.com/bot.html 5 | remote_addresses: [ "5.102.173.71/32" ] -------------------------------------------------------------------------------- /data/crawlers/qwantbot.yaml: -------------------------------------------------------------------------------- 1 | - name: qwantbot 2 | user_agent_regex: \+https\://help\.qwant\.com/bot/ 3 | action: ALLOW 4 | # https://help.qwant.com/wp-content/uploads/sites/2/2025/01/qwantbot.json 5 | remote_addresses: [ "91.242.162.0/24" ] 6 | -------------------------------------------------------------------------------- /data/embed.go: -------------------------------------------------------------------------------- 1 | package data 2 | 3 | import "embed" 4 | 5 | var ( 6 | //go:embed botPolicies.yaml botPolicies.json apps bots common crawlers 7 | BotPolicies embed.FS 8 | ) 9 | -------------------------------------------------------------------------------- /decaymap/decaymap.go: -------------------------------------------------------------------------------- 1 | package decaymap 2 | 3 | import ( 4 | "sync" 5 | "time" 6 | ) 7 | 8 | func Zilch[T any]() T { 9 | var zero T 10 | return zero 11 | } 12 | 13 | // Impl is a lazy key->value map. It's a wrapper around a map and a mutex. If values exceed their time-to-live, they are pruned at Get time. 14 | type Impl[K comparable, V any] struct { 15 | data map[K]decayMapEntry[V] 16 | lock sync.RWMutex 17 | } 18 | 19 | type decayMapEntry[V any] struct { 20 | Value V 21 | expiry time.Time 22 | } 23 | 24 | // New creates a new DecayMap of key type K and value type V. 25 | // 26 | // Key types must be comparable to work with maps. 27 | func New[K comparable, V any]() *Impl[K, V] { 28 | return &Impl[K, V]{ 29 | data: make(map[K]decayMapEntry[V]), 30 | } 31 | } 32 | 33 | // expire forcibly expires a key by setting its time-to-live one second in the past. 34 | func (m *Impl[K, V]) expire(key K) bool { 35 | m.lock.RLock() 36 | val, ok := m.data[key] 37 | m.lock.RUnlock() 38 | 39 | if !ok { 40 | return false 41 | } 42 | 43 | m.lock.Lock() 44 | val.expiry = time.Now().Add(-1 * time.Second) 45 | m.data[key] = val 46 | m.lock.Unlock() 47 | 48 | return true 49 | } 50 | 51 | // Get gets a value from the DecayMap by key. 52 | // 53 | // If a value has expired, forcibly delete it if it was not updated. 54 | func (m *Impl[K, V]) Get(key K) (V, bool) { 55 | m.lock.RLock() 56 | value, ok := m.data[key] 57 | m.lock.RUnlock() 58 | 59 | if !ok { 60 | return Zilch[V](), false 61 | } 62 | 63 | if time.Now().After(value.expiry) { 64 | m.lock.Lock() 65 | // Since previously reading m.data[key], the value may have been updated. 66 | // Delete the entry only if the expiry time is still the same. 67 | if m.data[key].expiry.Equal(value.expiry) { 68 | delete(m.data, key) 69 | } 70 | m.lock.Unlock() 71 | 72 | return Zilch[V](), false 73 | } 74 | 75 | return value.Value, true 76 | } 77 | 78 | // Set sets a key value pair in the map. 79 | func (m *Impl[K, V]) Set(key K, value V, ttl time.Duration) { 80 | m.lock.Lock() 81 | defer m.lock.Unlock() 82 | 83 | m.data[key] = decayMapEntry[V]{ 84 | Value: value, 85 | expiry: time.Now().Add(ttl), 86 | } 87 | } 88 | 89 | // Cleanup removes all expired entries from the DecayMap. 90 | func (m *Impl[K, V]) Cleanup() { 91 | m.lock.Lock() 92 | defer m.lock.Unlock() 93 | 94 | now := time.Now() 95 | for key, entry := range m.data { 96 | if now.After(entry.expiry) { 97 | delete(m.data, key) 98 | } 99 | } 100 | } 101 | 102 | // Len returns the number of entries in the DecayMap. 103 | func (m *Impl[K, V]) Len() int { 104 | m.lock.RLock() 105 | defer m.lock.RUnlock() 106 | return len(m.data) 107 | } 108 | -------------------------------------------------------------------------------- /decaymap/decaymap_test.go: -------------------------------------------------------------------------------- 1 | package decaymap 2 | 3 | import ( 4 | "testing" 5 | "time" 6 | ) 7 | 8 | func TestImpl(t *testing.T) { 9 | dm := New[string, string]() 10 | 11 | dm.Set("test", "hi", 5*time.Minute) 12 | 13 | val, ok := dm.Get("test") 14 | if !ok { 15 | t.Error("somehow the test key was not set") 16 | } 17 | 18 | if val != "hi" { 19 | t.Errorf("wanted value %q, got: %q", "hi", val) 20 | } 21 | 22 | ok = dm.expire("test") 23 | if !ok { 24 | t.Error("somehow could not force-expire the test key") 25 | } 26 | 27 | _, ok = dm.Get("test") 28 | if ok { 29 | t.Error("got value even though it was supposed to be expired") 30 | } 31 | } 32 | 33 | func TestCleanup(t *testing.T) { 34 | dm := New[string, string]() 35 | 36 | dm.Set("test1", "hi1", 1*time.Second) 37 | dm.Set("test2", "hi2", 2*time.Second) 38 | dm.Set("test3", "hi3", 3*time.Second) 39 | 40 | dm.expire("test1") // Force expire test1 41 | dm.expire("test2") // Force expire test2 42 | 43 | dm.Cleanup() 44 | 45 | finalLen := dm.Len() // Get the length after cleanup 46 | 47 | if finalLen != 1 { // "test3" should be the only one left 48 | t.Errorf("Cleanup failed to remove expired entries. Expected length 1, got %d", finalLen) 49 | } 50 | 51 | if _, ok := dm.Get("test1"); ok { // Verify Get still behaves correctly after Cleanup 52 | t.Error("test1 should not be found after cleanup") 53 | } 54 | if _, ok := dm.Get("test2"); ok { 55 | t.Error("test2 should not be found after cleanup") 56 | } 57 | if val, ok := dm.Get("test3"); !ok || val != "hi3" { 58 | t.Error("test3 should still be found after cleanup") 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /docs/.dockerignore: -------------------------------------------------------------------------------- 1 | # Dependencies 2 | /node_modules 3 | 4 | # Production 5 | /build 6 | 7 | # Generated files 8 | .docusaurus 9 | .cache-loader 10 | 11 | # Misc 12 | .DS_Store 13 | .env.local 14 | .env.development.local 15 | .env.test.local 16 | .env.production.local 17 | 18 | npm-debug.log* 19 | yarn-debug.log* 20 | yarn-error.log* 21 | 22 | # Kubernetes manifests 23 | /manifest -------------------------------------------------------------------------------- /docs/.gitignore: -------------------------------------------------------------------------------- 1 | # Dependencies 2 | /node_modules 3 | 4 | # Production 5 | /build 6 | 7 | # Generated files 8 | .docusaurus 9 | .cache-loader 10 | 11 | # Misc 12 | .DS_Store 13 | .env.local 14 | .env.development.local 15 | .env.test.local 16 | .env.production.local 17 | 18 | npm-debug.log* 19 | yarn-debug.log* 20 | yarn-error.log* 21 | -------------------------------------------------------------------------------- /docs/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM docker.io/library/node AS build 2 | 3 | WORKDIR /app 4 | COPY . . 5 | 6 | RUN npm ci && npm run build 7 | 8 | FROM docker.io/library/nginx:alpine 9 | COPY --from=build /app/build /usr/share/nginx/html 10 | LABEL org.opencontainers.image.source="https://github.com/TecharoHQ/anubis" -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | # Website 2 | 3 | This website is built using [Docusaurus](https://docusaurus.io/), a modern static website generator. 4 | 5 | ### Installation 6 | 7 | ``` 8 | $ yarn 9 | ``` 10 | 11 | ### Local Development 12 | 13 | ``` 14 | $ yarn start 15 | ``` 16 | 17 | This command starts a local development server and opens up a browser window. Most changes are reflected live without having to restart the server. 18 | 19 | ### Build 20 | 21 | ``` 22 | $ yarn build 23 | ``` 24 | 25 | This command generates static content into the `build` directory and can be served using any static contents hosting service. 26 | 27 | ### Deployment 28 | 29 | Using SSH: 30 | 31 | ``` 32 | $ USE_SSH=true yarn deploy 33 | ``` 34 | 35 | Not using SSH: 36 | 37 | ``` 38 | $ GIT_USER= yarn deploy 39 | ``` 40 | 41 | If you are using GitHub pages for hosting, this command is a convenient way to build the website and push to the `gh-pages` branch. 42 | -------------------------------------------------------------------------------- /docs/docs/admin/_category_.json: -------------------------------------------------------------------------------- 1 | { 2 | "label": "Administrative guides", 3 | "position": 40, 4 | "link": { 5 | "type": "generated-index", 6 | "description": "Tradeoffs and considerations you may want to keep in mind when using Anubis." 7 | } 8 | } -------------------------------------------------------------------------------- /docs/docs/admin/algorithm-selection.mdx: -------------------------------------------------------------------------------- 1 | --- 2 | title: Proof-of-Work Algorithm Selection 3 | --- 4 | 5 | Anubis offers two proof-of-work algorithms: 6 | 7 | - `"fast"`: highly optimized JavaScript that will run as fast as your computer lets it 8 | - `"slow"`: intentionally slow JavaScript that will waste time and memory 9 | 10 | The fast algorithm is used by default to limit impacts on users' computers. Administrators may configure individual bot policy rules to use the slow algorithm in order to make known malicious clients waitloop and do nothing useful. 11 | 12 | Generally, you should use the fast algorithm unless you have a good reason not to. 13 | -------------------------------------------------------------------------------- /docs/docs/admin/caveats-gitea-forgejo.mdx: -------------------------------------------------------------------------------- 1 | --- 2 | title: When using Caddy with Gitea/Forgejo 3 | --- 4 | 5 | Gitea/Forgejo relies on the reverse proxy setting the `X-Real-Ip` header. Caddy does not do this out of the gate. Modify your Caddyfile like this: 6 | 7 | ```python 8 | ellenjoe.int.within.lgbt { 9 | # ... 10 | # diff-remove 11 | reverse_proxy http://localhost:3000 12 | # diff-add 13 | reverse_proxy http://localhost:3000 { 14 | # diff-add 15 | header_up X-Real-Ip {remote_host} 16 | # diff-add 17 | } 18 | # ... 19 | } 20 | ``` 21 | 22 | Ensure that Gitea/Forgejo have `[security].REVERSE_PROXY_TRUSTED_PROXIES` set to the IP ranges that Anubis will appear from. Typically this is sufficient: 23 | 24 | ```ini 25 | [security] 26 | REVERSE_PROXY_TRUSTED_PROXIES = 127.0.0.0/8,::1/128 27 | ``` 28 | 29 | However if you are running Anubis in a separate Pod/Deployment in Kubernetes, you may have to adjust this to the IP range of the Pod space in your Container Networking Interface plugin: 30 | 31 | ```ini 32 | [security] 33 | REVERSE_PROXY_TRUSTED_PROXIES = 10.192.0.0/12 34 | ``` 35 | -------------------------------------------------------------------------------- /docs/docs/admin/configuration/_category_.json: -------------------------------------------------------------------------------- 1 | { 2 | "label": "Configuration", 3 | "position": 10, 4 | "link": { 5 | "type": "generated-index", 6 | "description": "Detailed information about configuring parts of Anubis." 7 | } 8 | } -------------------------------------------------------------------------------- /docs/docs/admin/configuration/import.mdx: -------------------------------------------------------------------------------- 1 | # Importing configuration rules 2 | 3 | import Tabs from "@theme/Tabs"; 4 | import TabItem from "@theme/TabItem"; 5 | 6 | Anubis has the ability to let you import snippets of configuration into the main configuration file. This allows you to break up your config into smaller parts that get logically assembled into one big file. 7 | 8 | EG: 9 | 10 | 11 | 12 | 13 | ```json 14 | { 15 | "bots": [ 16 | { 17 | "import": "(data)/bots/ai-robots-txt.yaml" 18 | }, 19 | { 20 | "import": "(data)/bots/cloudflare-workers.yaml" 21 | } 22 | ] 23 | } 24 | ``` 25 | 26 | 27 | 28 | 29 | ```yaml 30 | bots: 31 | # Pathological bots to deny 32 | - # This correlates to data/bots/ai-robots-txt.yaml in the source tree 33 | import: (data)/bots/ai-robots-txt.yaml 34 | - import: (data)/bots/cloudflare-workers.yaml 35 | ``` 36 | 37 | 38 | 39 | 40 | Of note, a bot rule can either have inline bot configuration or import a bot config snippet. You cannot do both in a single bot rule. 41 | 42 | 43 | 44 | 45 | ```json 46 | { 47 | "bots": [ 48 | { 49 | "import": "(data)/bots/ai-robots-txt.yaml", 50 | "name": "generic-browser", 51 | "user_agent_regex": "Mozilla|Opera\n", 52 | "action": "CHALLENGE" 53 | } 54 | ] 55 | } 56 | ``` 57 | 58 | 59 | 60 | 61 | ```yaml 62 | bots: 63 | - import: (data)/bots/ai-robots-txt.yaml 64 | name: generic-browser 65 | user_agent_regex: > 66 | Mozilla|Opera 67 | action: CHALLENGE 68 | ``` 69 | 70 | 71 | 72 | 73 | This will return an error like this: 74 | 75 | ```text 76 | config is not valid: 77 | config.BotOrImport: rule definition is invalid, you must set either bot rules or an import statement, not both 78 | ``` 79 | 80 | Paths can either be prefixed with `(data)` to import from the [the data folder in the Anubis source tree](https://github.com/TecharoHQ/anubis/tree/main/data) or anywhere on the filesystem. If you don't have access to the Anubis source tree, check /usr/share/docs/anubis/data or in the tarball you extracted Anubis from. 81 | 82 | ## Writing snippets 83 | 84 | Snippets can be written in either JSON or YAML, with a preference for YAML. When writing a snippet, write the bot rules you want directly at the top level of the file in a list. 85 | 86 | Here is an example snippet that allows [IPv6 Unique Local Addresses](https://en.wikipedia.org/wiki/Unique_local_address) through Anubis: 87 | 88 | 89 | 90 | 91 | ```json 92 | [ 93 | { 94 | "name": "ipv6-ula", 95 | "action": "ALLOW", 96 | "remote_addresses": ["fc00::/7"] 97 | } 98 | ] 99 | ``` 100 | 101 | 102 | 103 | 104 | ```yaml 105 | - name: ipv6-ula 106 | action: ALLOW 107 | remote_addresses: 108 | - fc00::/7 109 | ``` 110 | 111 | 112 | 113 | 114 | ## Extracting Anubis' embedded filesystem 115 | 116 | You can always extract the list of rules embedded into the Anubis binary with this command: 117 | 118 | ```text 119 | anubis --extract-resources=static 120 | ``` 121 | 122 | This will dump the contents of Anubis' embedded data to a new folder named `static`: 123 | 124 | ```text 125 | static 126 | ├── apps 127 | │ └── gitea-rss-feeds.yaml 128 | ├── botPolicies.json 129 | ├── botPolicies.yaml 130 | ├── bots 131 | │ ├── ai-robots-txt.yaml 132 | │ ├── cloudflare-workers.yaml 133 | │ ├── headless-browsers.yaml 134 | │ └── us-ai-scraper.yaml 135 | ├── common 136 | │ ├── allow-private-addresses.yaml 137 | │ └── keep-internet-working.yaml 138 | └── crawlers 139 | ├── bingbot.yaml 140 | ├── duckduckbot.yaml 141 | ├── googlebot.yaml 142 | ├── internet-archive.yaml 143 | ├── kagibot.yaml 144 | ├── marginalia.yaml 145 | ├── mojeekbot.yaml 146 | └── qwantbot.yaml 147 | ``` 148 | -------------------------------------------------------------------------------- /docs/docs/admin/configuration/open-graph.mdx: -------------------------------------------------------------------------------- 1 | --- 2 | id: open-graph 3 | title: Open Graph Configuration 4 | --- 5 | 6 | # Open Graph Configuration 7 | 8 | This page provides detailed information on how to configure [OpenGraph tag](https://ogp.me/) passthrough in Anubis. This enables social previews of resources protected by Anubis without having to exempt each scraper individually. 9 | 10 | ## Configuration Options 11 | 12 | | Name | Description | Type | Default | Example | 13 | |------------------|-----------------------------------------------------------|----------|---------|-------------------------| 14 | | `OG_PASSTHROUGH` | Enables or disables the Open Graph tag passthrough system | Boolean | `false` | `OG_PASSTHROUGH=true` | 15 | | `OG_EXPIRY_TIME` | Configurable cache expiration time for Open Graph tags | Duration | `24h` | `OG_EXPIRY_TIME=1h` | 16 | 17 | ## Usage 18 | 19 | To configure Open Graph tags, you can set the following environment variables, environment file or as flags in your Anubis configuration: 20 | 21 | ```sh 22 | export OG_PASSTHROUGH=true 23 | export OG_EXPIRY_TIME=1h 24 | ``` 25 | 26 | ## Implementation Details 27 | 28 | When `OG_PASSTHROUGH` is enabled, Anubis will: 29 | 30 | 1. Check a local cache for the requested URL's Open Graph tags. 31 | 2. If a cached entry exists and is still valid, return the cached tags. 32 | 3. If the cached entry is stale or not found, fetch the URL, parse the Open Graph tags, update the cache, and return the new tags. 33 | 34 | The cache expiration time is controlled by `OG_EXPIRY_TIME`. 35 | 36 | ## Example 37 | 38 | Here is an example of how to configure Open Graph tags in your Anubis setup: 39 | 40 | ```sh 41 | export OG_PASSTHROUGH=true 42 | export OG_EXPIRY_TIME=1h 43 | ``` 44 | 45 | With these settings, Anubis will cache Open Graph tags for 1 hour and pass them through to the challenge page. 46 | 47 | For more information, refer to the [installation guide](../installation). 48 | -------------------------------------------------------------------------------- /docs/docs/admin/default-allow-behavior.mdx: -------------------------------------------------------------------------------- 1 | --- 2 | title: Default allow behavior 3 | --- 4 | 5 | import Tabs from "@theme/Tabs"; 6 | import TabItem from "@theme/TabItem"; 7 | 8 | # Default allow behavior 9 | 10 | Anubis is designed to be as unintrusive as possible to your existing infrastructure. 11 | 12 | By default, it allows all traffic unless a request matches a rule that explicitly denies or challenges it. 13 | 14 | Only requests matching a DENY or CHALLENGE rule are blocked or challenged. All other requests are allowed. This is called "the implicit rule". 15 | 16 | ## Example: Minimal policy 17 | 18 | If your policy only blocks a specific bot, all other requests will be allowed: 19 | 20 | 21 | 22 | 23 | ```json 24 | { 25 | "bots": [ 26 | { 27 | "name": "block-amazonbot", 28 | "user_agent_regex": "Amazonbot", 29 | "action": "DENY" 30 | } 31 | ] 32 | } 33 | ``` 34 | 35 | 36 | 37 | 38 | ```yaml 39 | - name: block-amazonbot 40 | user_agent_regex: Amazonbot 41 | action: DENY 42 | ``` 43 | 44 | 45 | 46 | 47 | ## How to deny by default 48 | 49 | If you want to deny all traffic except what you explicitly allow, add a catch-all deny rule at the end of your policy list. Make sure to add ALLOW rules for any traffic you want to permit before this rule. 50 | 51 | 52 | 53 | 54 | ```json 55 | { 56 | "bots": [ 57 | { 58 | "name": "allow-goodbot", 59 | "user_agent_regex": "GoodBot", 60 | "action": "ALLOW" 61 | }, 62 | { 63 | "name": "catch-all-deny", 64 | "path_regex": ".*", 65 | "action": "DENY" 66 | } 67 | ] 68 | } 69 | ``` 70 | 71 | 72 | 73 | 74 | ```yaml 75 | - name: allow-goodbot 76 | user_agent_regex: GoodBot 77 | action: ALLOW 78 | - name: catch-all-deny 79 | path_regex: .* 80 | action: DENY 81 | ``` 82 | 83 | 84 | 85 | 86 | ## Final remarks 87 | 88 | - Rules are evaluated in order; the first match wins. 89 | - The implicit allow rule is always last and cannot be removed. 90 | - Use your logs to monitor what traffic is being allowed by default. 91 | 92 | See [Policy Definitions](./policies) for more details on writing rules. -------------------------------------------------------------------------------- /docs/docs/admin/environments/_category_.json: -------------------------------------------------------------------------------- 1 | { 2 | "label": "Environments", 3 | "position": 20, 4 | "link": { 5 | "type": "generated-index", 6 | "description": "Detailed information about individual environments (such as HTTP servers, platforms, etc.) Anubis is known to work with." 7 | } 8 | } -------------------------------------------------------------------------------- /docs/docs/admin/environments/apache.mdx: -------------------------------------------------------------------------------- 1 | # Apache 2 | 3 | import Tabs from "@theme/Tabs"; 4 | import TabItem from "@theme/TabItem"; 5 | 6 | Anubis is intended to be a filter proxy. The way to integrate this is to break your configuration up into two parts: TLS termination and then HTTP routing. Consider this diagram: 7 | 8 | ```mermaid 9 | --- 10 | title: Apache as tls terminator and HTTP router 11 | --- 12 | 13 | flowchart LR 14 | T(User Traffic) 15 | subgraph Apache 2 16 | TCP(TCP 80/443) 17 | US(TCP 3001) 18 | end 19 | 20 | An(Anubis) 21 | B(Backend) 22 | 23 | T --> |TLS termination| TCP 24 | TCP --> |Traffic filtering| An 25 | An --> |Happy traffic| US 26 | US --> |whatever you're doing| B 27 | ``` 28 | 29 | Effectively you have one trip through Apache to do TLS termination, a detour through Anubis for traffic scrubbing, and then going to the backend directly. This final socket is what will do HTTP routing. 30 | 31 | :::note 32 | 33 | These examples assume that you are using a setup where your nginx configuration is made up of a bunch of files in `/etc/httpd/conf.d/*.conf`. This is not true for all deployments of Apache. If you are not in such an environment, append these snippets to your `/etc/httpd/conf/httpd.conf` file. 34 | 35 | ::: 36 | 37 | ## Dependencies 38 | 39 | Install the following dependencies for proxying HTTP: 40 | 41 | 42 | 43 | 44 | ```text 45 | dnf -y install mod_proxy_html 46 | ``` 47 | 48 | 49 | 50 | 51 | ```text 52 | apt-get install -y libapache2-mod-proxy-html libxml2-dev 53 | ``` 54 | 55 | 56 | 57 | 58 | ## Configuration 59 | 60 | Assuming you are protecting `anubistest.techaro.lol`, you need the following server configuration blocks: 61 | 62 | 1. A block on port 80 that forwards HTTP to HTTPS 63 | 2. A block on port 443 that terminates TLS and forwards to Anubis 64 | 3. A block on port 3001 that actually serves your websites 65 | 66 | ```text 67 | # Plain HTTP redirect to HTTPS 68 | 69 | ServerAdmin your@email.here 70 | ServerName anubistest.techaro.lol 71 | DocumentRoot /var/www/anubistest.techaro.lol 72 | ErrorLog /var/log/httpd/anubistest.techaro.lol_error.log 73 | CustomLog /var/log/httpd/anubistest.techaro.lol_access.log combined 74 | RewriteEngine on 75 | RewriteCond %{SERVER_NAME} =anubistest.techaro.lol 76 | RewriteRule ^ https://%{SERVER_NAME}%{REQUEST_URI} [END,NE,R=permanent] 77 | 78 | 79 | # HTTPS listener that forwards to Anubis 80 | 81 | ServerAdmin your@email.here 82 | ServerName anubistest.techaro.lol 83 | DocumentRoot /var/www/anubistest.techaro.lol 84 | ErrorLog /var/log/httpd/anubistest.techaro.lol_error.log 85 | CustomLog /var/log/httpd/anubistest.techaro.lol_access.log combined 86 | 87 | SSLCertificateFile /etc/letsencrypt/live/anubistest.techaro.lol/fullchain.pem 88 | SSLCertificateKeyFile /etc/letsencrypt/live/anubistest.techaro.lol/privkey.pem 89 | Include /etc/letsencrypt/options-ssl-apache.conf 90 | 91 | # These headers need to be set or else Anubis will 92 | # throw an "admin misconfiguration" error. 93 | RequestHeader set "X-Real-Ip" expr=%{REMOTE_ADDR} 94 | RequestHeader set X-Forwarded-Proto "https" 95 | 96 | ProxyPreserveHost On 97 | 98 | ProxyRequests Off 99 | ProxyVia Off 100 | 101 | # Replace 9000 with the port Anubis listens on 102 | ProxyPass / http://[::1]:9000/ 103 | ProxyPassReverse / http://[::1]:9000/ 104 | 105 | 106 | 107 | # Actual website config 108 | 109 | ServerAdmin your@email.here 110 | ServerName anubistest.techaro.lol 111 | DocumentRoot /var/www/anubistest.techaro.lol 112 | ErrorLog /var/log/httpd/anubistest.techaro.lol_error.log 113 | CustomLog /var/log/httpd/anubistest.techaro.lol_access.log combined 114 | 115 | ``` 116 | 117 | Make sure to add a separate configuration file for the listener on port 3001: 118 | 119 | ```text 120 | # /etc/httpd/conf.d/listener-3001.conf 121 | 122 | Listen 3001 123 | ``` 124 | 125 | This can be repeated for multiple sites. Anubis does not care about the HTTP `Host` header and will happily cope with multiple websites via the same instance. 126 | 127 | Then reload your Apache config and load your website. You should see Anubis protecting your apps! 128 | 129 | ```text 130 | sudo systemctl reload httpd.service 131 | ``` 132 | 133 | ## Troubleshooting 134 | 135 | Here are some answers to questions that came in in testing: 136 | 137 | ### I'm running on a Red Hat distribution and Apache is saying "service unavailable" for every page load 138 | 139 | If you see a "Service unavailable" error on every page load and run a Red Hat derived distribution, you are missing a `selinux` setting. The exact command will be in a journalctl log message like this: 140 | 141 | ```text 142 | ***** Plugin catchall_boolean (89.3 confidence) suggests ****************** 143 | 144 | If you want to allow HTTPD scripts and modules to connect to the network using TCP. 145 | Then you must tell SELinux about this by enabling the 'httpd_can_network_connect' boolean. 146 | 147 | Do 148 | setsebool -P httpd_can_network_connect 1 149 | ``` 150 | 151 | This will fix the error immediately. 152 | -------------------------------------------------------------------------------- /docs/docs/admin/environments/docker-compose.mdx: -------------------------------------------------------------------------------- 1 | # Docker compose 2 | 3 | Docker compose is typically used in concert with other load balancers such as [Apache](./apache.mdx) or [Nginx](./nginx.mdx). Below is a minimal example showing you how to set up an instance of Anubis listening on host port 8080 that points to a static website containing data in `./www`: 4 | 5 | ```yaml 6 | services: 7 | anubis-nginx: 8 | image: ghcr.io/techarohq/anubis:latest 9 | environment: 10 | BIND: ":8080" 11 | DIFFICULTY: "4" 12 | METRICS_BIND: ":9090" 13 | SERVE_ROBOTS_TXT: "true" 14 | TARGET: "http://nginx" 15 | POLICY_FNAME: "/data/cfg/botPolicy.yaml" 16 | OG_PASSTHROUGH: "true" 17 | OG_EXPIRY_TIME: "24h" 18 | ports: 19 | - 8080:8080 20 | volumes: 21 | - "./botPolicy.yaml:/data/cfg/botPolicy.yaml:ro" 22 | nginx: 23 | image: nginx 24 | volumes: 25 | - "./www:/usr/share/nginx/html" 26 | ``` 27 | -------------------------------------------------------------------------------- /docs/docs/admin/environments/kubernetes.mdx: -------------------------------------------------------------------------------- 1 | # Kubernetes 2 | 3 | When setting up Anubis in Kubernetes, you want to make sure that you thread requests through Anubis kinda like this: 4 | 5 | ```mermaid 6 | --- 7 | title: Anubis embedded into workload pods 8 | --- 9 | 10 | flowchart LR 11 | T(User Traffic) 12 | 13 | IngressController(IngressController) 14 | 15 | subgraph Service 16 | AnPort(Anubis Port) 17 | BPort(Backend Port) 18 | end 19 | 20 | subgraph Pod 21 | An(Anubis) 22 | B(Backend) 23 | end 24 | 25 | T --> IngressController 26 | IngressController --> AnPort 27 | AnPort --> An 28 | An --> B 29 | ``` 30 | 31 | Anubis is lightweight enough that you should be able to have many instances of it running without many problems. If this is a concern for you, please check out [ingress-anubis](https://github.com/jaredallard/ingress-anubis?ref=anubis.techaro.lol). 32 | 33 | This example makes the following assumptions: 34 | 35 | - Your target service is listening on TCP port `5000`. 36 | - Anubis will be listening on port `8080`. 37 | 38 | Adjust these values as facts and circumstances demand. 39 | 40 | Create a secret with the signing key Anubis should use for its responses: 41 | 42 | ``` 43 | kubectl create secret generic anubis-key \ 44 | --namespace default \ 45 | --from-literal=ED25519_PRIVATE_KEY_HEX=$(openssl rand -hex 32) 46 | ``` 47 | 48 | Attach Anubis to your Deployment: 49 | 50 | ```yaml 51 | containers: 52 | # ... 53 | - name: anubis 54 | image: ghcr.io/techarohq/anubis:latest 55 | imagePullPolicy: Always 56 | env: 57 | - name: "BIND" 58 | value: ":8080" 59 | - name: "DIFFICULTY" 60 | value: "4" 61 | - name: ED25519_PRIVATE_KEY_HEX 62 | valueFrom: 63 | secretKeyRef: 64 | name: anubis-key 65 | key: ED25519_PRIVATE_KEY_HEX 66 | - name: "METRICS_BIND" 67 | value: ":9090" 68 | - name: "SERVE_ROBOTS_TXT" 69 | value: "true" 70 | - name: "TARGET" 71 | value: "http://localhost:5000" 72 | - name: "OG_PASSTHROUGH" 73 | value: "true" 74 | - name: "OG_EXPIRY_TIME" 75 | value: "24h" 76 | resources: 77 | limits: 78 | cpu: 750m 79 | memory: 256Mi 80 | requests: 81 | cpu: 250m 82 | memory: 256Mi 83 | securityContext: 84 | runAsUser: 1000 85 | runAsGroup: 1000 86 | runAsNonRoot: true 87 | allowPrivilegeEscalation: false 88 | capabilities: 89 | drop: 90 | - ALL 91 | seccompProfile: 92 | type: RuntimeDefault 93 | ``` 94 | 95 | Then add a Service entry for Anubis: 96 | 97 | ```yaml 98 | # ... 99 | spec: 100 | ports: 101 | # diff-add 102 | - protocol: TCP 103 | # diff-add 104 | port: 8080 105 | # diff-add 106 | targetPort: 8080 107 | # diff-add 108 | name: anubis 109 | ``` 110 | 111 | Then point your Ingress to the Anubis port: 112 | 113 | ```yaml 114 | rules: 115 | - host: git.xeserv.us 116 | http: 117 | paths: 118 | - pathType: Prefix 119 | path: "/" 120 | backend: 121 | service: 122 | name: git 123 | port: 124 | # diff-remove 125 | name: http 126 | # diff-add 127 | name: anubis 128 | ``` 129 | -------------------------------------------------------------------------------- /docs/docs/admin/environments/nginx.mdx: -------------------------------------------------------------------------------- 1 | # Nginx 2 | 3 | Anubis is intended to be a filter proxy. The way to integrate this with nginx is to break your configuration up into two parts: TLS termination and then HTTP routing. Consider this diagram: 4 | 5 | ```mermaid 6 | --- 7 | title: Nginx as tls terminator and HTTP router 8 | --- 9 | 10 | flowchart LR 11 | T(User Traffic) 12 | subgraph Nginx 13 | TCP(TCP 80/443) 14 | US(Unix Socket or 15 | another TCP port) 16 | end 17 | 18 | An(Anubis) 19 | B(Backend) 20 | 21 | T --> |TLS termination| TCP 22 | TCP --> |Traffic filtering| An 23 | An --> |Happy traffic| US 24 | US --> |whatever you're doing| B 25 | ``` 26 | 27 | Instead of your traffic going right from TLS termination into the backend, it takes a detour through Anubis. Anubis filters out the "bad" traffic and then passes the "good" traffic to another socket that Nginx has open. This final socket is what you will use to do HTTP routing. 28 | 29 | Effectively, you have two roles for nginx: TLS termination (converting HTTPS to HTTP) and HTTP routing (distributing requests to the individual vhosts). This can stack with something like Apache in case you have a legacy deployment. Make sure you have the right [TLS certificates configured](https://code.kuederle.com/letsencrypt/) at the TLS termination level. 30 | 31 | :::note 32 | 33 | These examples assume that you are using a setup where your nginx configuration is made up of a bunch of files in `/etc/nginx/conf.d/*.conf`. This is not true for all deployments of nginx. If you are not in such an environment, append these snippets to your `/etc/nginx/nginx.conf` file. 34 | 35 | ::: 36 | 37 | Assuming that we are protecting `anubistest.techaro.lol`, here's what the server configuration file would look like: 38 | 39 | ```nginx 40 | # /etc/nginx/conf.d/server-anubistest-techaro-lol.conf 41 | 42 | # HTTP - Redirect all HTTP traffic to HTTPS 43 | server { 44 | listen 80; 45 | listen [::]:80; 46 | 47 | server_name anubistest.techaro.lol; 48 | 49 | location / { 50 | return 301 https://$host$request_uri; 51 | } 52 | } 53 | 54 | # TLS termination server, this will listen over TLS (https) and then 55 | # proxy all traffic to the target via Anubis. 56 | server { 57 | # Listen on TCP port 443 with TLS (https) and HTTP/2 58 | listen 443 ssl http2; 59 | listen [::]:443 ssl http2; 60 | 61 | location / { 62 | proxy_set_header Host $host; 63 | proxy_set_header X-Real-IP $remote_addr; 64 | proxy_pass http://anubis; 65 | } 66 | 67 | server_name anubistest.techaro.lol; 68 | 69 | ssl_certificate /path/to/your/certs/anubistest.techaro.lol.crt; 70 | ssl_certificate_key /path/to/your/certs/anubistest.techaro.lol.key; 71 | } 72 | 73 | # Backend server, this is where your webapp should actually live. 74 | server { 75 | listen unix:/run/nginx/nginx.sock; 76 | 77 | server_name anubistest.techaro.lol; 78 | root "/srv/http/anubistest.techaro.lol"; 79 | index index.html; 80 | 81 | # Your normal configuration can go here 82 | # location .php { fastcgi...} etc. 83 | } 84 | ``` 85 | 86 | :::tip 87 | 88 | You can copy the `location /` block into a separate file named something like `conf-anubis.inc` and then include it inline to other `server` blocks: 89 | 90 | ```nginx 91 | # /etc/nginx/conf.d/conf-anubis.inc 92 | 93 | # Forward to anubis 94 | location / { 95 | proxy_set_header Host $host; 96 | proxy_set_header X-Real-IP $remote_addr; 97 | proxy_pass http://anubis; 98 | } 99 | ``` 100 | 101 | Then in a server block: 102 | 103 |
104 | Full nginx config 105 | 106 | ```nginx 107 | # /etc/nginx/conf.d/server-mimi-techaro-lol.conf 108 | 109 | server { 110 | # Listen on 443 with SSL 111 | listen 443 ssl http2; 112 | listen [::]:443 ssl http2; 113 | 114 | # Slipstream via Anubis 115 | include "conf-anubis.inc"; 116 | 117 | server_name mimi.techaro.lol; 118 | 119 | ssl_certificate /path/to/your/certs/mimi.techaro.lol.crt; 120 | ssl_certificate_key /path/to/your/certs/mimi.techaro.lol.key; 121 | } 122 | 123 | server { 124 | listen unix:/run/nginx/nginx.sock; 125 | 126 | server_name mimi.techaro.lol; 127 | root "/srv/http/mimi.techaro.lol"; 128 | index index.html; 129 | 130 | # Your normal configuration can go here 131 | # location .php { fastcgi...} etc. 132 | } 133 | ``` 134 | 135 |
136 | 137 | ::: 138 | 139 | Create an upstream for Anubis. 140 | 141 | ```nginx 142 | # /etc/nginx/conf.d/upstream-anubis.conf 143 | 144 | upstream anubis { 145 | # Make sure this matches the values you set for `BIND` and `BIND_NETWORK`. 146 | # If this does not match, your services will not be protected by Anubis. 147 | 148 | # Try anubis first over a UNIX socket 149 | server unix:/run/anubis/nginx.sock; 150 | #server http://127.0.0.1:8923; 151 | 152 | # Optional: fall back to serving the websites directly. This allows your 153 | # websites to be resilient against Anubis failing, at the risk of exposing 154 | # them to the raw internet without protection. This is a tradeoff and can 155 | # be worth it in some edge cases. 156 | #server unix:/run/nginx.sock backup; 157 | } 158 | ``` 159 | 160 | This can be repeated for multiple sites. Anubis does not care about the HTTP `Host` header and will happily cope with multiple websites via the same instance. 161 | 162 | Then reload your nginx config and load your website. You should see Anubis protecting your apps! 163 | 164 | ```text 165 | sudo systemctl reload nginx.service 166 | ``` 167 | -------------------------------------------------------------------------------- /docs/docs/admin/native-install.mdx: -------------------------------------------------------------------------------- 1 | --- 2 | title: Installing Anubis with a native package 3 | --- 4 | 5 | import Tabs from "@theme/Tabs"; 6 | import TabItem from "@theme/TabItem"; 7 | 8 | Download the package for your system from [the most recent release on GitHub](https://github.com/TecharoHQ/anubis/releases). 9 | 10 | Install the Anubis package using your package manager of choice: 11 | 12 | 13 | 14 | 15 | Install Anubis with `apt`: 16 | 17 | ```text 18 | sudo apt install ./anubis-$VERSION-$ARCH.deb 19 | ``` 20 | 21 | 22 | 23 | 24 | Extract the tarball to a folder: 25 | 26 | ```text 27 | tar zxf ./anubis-$VERSION-$OS-$ARCH.tar.gz 28 | cd anubis-$VERSION-$OS-$ARCH 29 | ``` 30 | 31 | Install the binary to your system: 32 | 33 | ```text 34 | sudo install -D ./bin/anubis /usr/local/bin 35 | ``` 36 | 37 | Edit the systemd unit to point to `/usr/local/bin/anubis` instead of `/usr/bin/anubis`: 38 | 39 | ```text 40 | perl -pi -e 's$/usr/bin/anubis$/usr/local/bin/anubis$g' ./run/anubis@.service 41 | ``` 42 | 43 | Install the systemd unit to your system: 44 | 45 | ```text 46 | sudo install -D ./run/anubis@.service /etc/systemd/system 47 | ``` 48 | 49 | Install the default configuration file to your system: 50 | 51 | ```text 52 | sudo install -D ./run/default.env /etc/anubis 53 | ``` 54 | 55 | 56 | 57 | 58 | Install Anubis with `dnf`: 59 | 60 | ```text 61 | sudo dnf -y install ./anubis-$VERSION.$ARCH.rpm 62 | ``` 63 | 64 | OR 65 | 66 | Install Anubis with `yum`: 67 | 68 | ```text 69 | sudo yum -y install ./anubis-$VERSION.$ARCH.rpm 70 | ``` 71 | 72 | OR 73 | 74 | Install Anubis with `rpm`: 75 | 76 | ``` 77 | sudo rpm -ivh ./anubis-$VERSION.$ARCH.rpm 78 | ``` 79 | 80 | 81 | 82 | 83 | Once it's installed, make a copy of the default configuration file `/etc/anubis/default.env` based on which service you want to protect. For example, to protect a `gitea` server: 84 | 85 | ```text 86 | sudo cp /etc/anubis/default.env /etc/anubis/gitea.env 87 | ``` 88 | 89 | Copy the default bot policies file to `/etc/anubis/gitea.botPolicies.yaml`: 90 | 91 | 92 | 93 | 94 | ```text 95 | sudo cp /usr/share/doc/anubis/botPolicies.yaml /etc/anubis/gitea.botPolicies.yaml 96 | ``` 97 | 98 | 99 | 100 | 101 | ```text 102 | sudo cp ./doc/botPolicies.yaml /etc/anubis/gitea.botPolicies.yaml 103 | ``` 104 | 105 | 106 | 107 | 108 | 109 | Then open `gitea.env` in your favorite text editor and customize [the environment variables](./installation.mdx#environment-variables) as needed. Here's an example configuration for a Gitea server: 110 | 111 | ```sh 112 | BIND=[::1]:8239 113 | BIND_NETWORK=tcp 114 | DIFFICULTY=4 115 | METRICS_BIND=[::1]:8240 116 | METRICS_BIND_NETWORK=tcp 117 | POLICY_FNAME=/etc/anubis/gitea.botPolicies.yaml 118 | TARGET=http://localhost:3000 119 | ``` 120 | 121 | Then start Anubis with `systemctl enable --now`: 122 | 123 | ```text 124 | sudo systemctl enable --now anubis@gitea.service 125 | ``` 126 | 127 | Test to make sure it's running with `curl`: 128 | 129 | ```text 130 | curl http://localhost:8240/metrics 131 | ``` 132 | 133 | Then set up your reverse proxy (Nginx, Caddy, etc.) to point to the Anubis port. Anubis will then reverse proxy all requests that meet the policies in `/etc/anubis/gitea.botPolicies.json` to the target service. 134 | 135 | For more details on particular reverse proxies, see here: 136 | 137 | - [Apache](./environments/apache.mdx) 138 | - [Nginx](./environments/nginx.mdx) 139 | -------------------------------------------------------------------------------- /docs/docs/design/_category_.json: -------------------------------------------------------------------------------- 1 | { 2 | "label": "Design", 3 | "position": 10, 4 | "link": { 5 | "type": "generated-index", 6 | "description": "How Anubis is designed and the tradeoffs it makes." 7 | } 8 | } -------------------------------------------------------------------------------- /docs/docs/design/how-anubis-works.mdx: -------------------------------------------------------------------------------- 1 | --- 2 | title: How Anubis works 3 | --- 4 | 5 | Anubis uses a proof-of-work challenge to ensure that clients are using a modern browser and are able to calculate SHA-256 checksums. Anubis has a customizable difficulty for this proof-of-work challenge, but defaults to 5 leading zeroes. 6 | 7 | ```mermaid 8 | --- 9 | title: Challenge generation and validation 10 | --- 11 | 12 | flowchart TD 13 | Backend("Backend") 14 | Fail("Fail") 15 | 16 | style PresentChallenge color:#FFFFFF, fill:#AA00FF, stroke:#AA00FF 17 | style ValidateChallenge color:#FFFFFF, fill:#AA00FF, stroke:#AA00FF 18 | style Backend color:#FFFFFF, stroke:#00C853, fill:#00C853 19 | style Fail color:#FFFFFF, stroke:#FF2962, fill:#FF2962 20 | 21 | subgraph Server 22 | PresentChallenge("Present Challenge") 23 | ValidateChallenge("Validate Challenge") 24 | end 25 | 26 | subgraph Client 27 | Main("main.mjs") 28 | Worker("Worker") 29 | end 30 | 31 | Main -- Request challenge --> PresentChallenge 32 | PresentChallenge -- Return challenge & difficulty --> Main 33 | Main -- Spawn worker --> Worker 34 | Worker -- Successful challenge --> Main 35 | Main -- Validate challenge --> ValidateChallenge 36 | ValidateChallenge -- Return cookie --> Backend 37 | ValidateChallenge -- If anything is wrong --> Fail 38 | ``` 39 | 40 | ### Challenge presentation 41 | 42 | Anubis decides to present a challenge using this logic: 43 | 44 | - User-Agent contains `"Mozilla"` 45 | - Request path is not in `/.well-known`, `/robots.txt`, or `/favicon.ico` 46 | - Request path is not obviously an RSS feed (ends with `.rss`, `.xml`, or `.atom`) 47 | 48 | This should ensure that git clients, RSS readers, and other low-harm clients can get through without issue, but high-risk clients such as browsers and AI scraper bots will get blocked. 49 | 50 | ```mermaid 51 | --- 52 | title: Challenge presentation logic 53 | --- 54 | 55 | flowchart LR 56 | Request("Request") 57 | Backend("Backend") 58 | %%Fail("Fail") 59 | PresentChallenge("Present 60 | challenge") 61 | HasMozilla{"Is browser 62 | or scraper?"} 63 | HasCookie{"Has cookie?"} 64 | HasExpired{"Cookie expired?"} 65 | HasSignature{"Has valid 66 | signature?"} 67 | RandomJitter{"Secondary 68 | screening?"} 69 | POWPass{"Proof of 70 | work valid?"} 71 | 72 | style PresentChallenge color:#FFFFFF, fill:#AA00FF, stroke:#AA00FF 73 | style Backend color:#FFFFFF, stroke:#00C853, fill:#00C853 74 | %%style Fail color:#FFFFFF, stroke:#FF2962, fill:#FF2962 75 | 76 | Request --> HasMozilla 77 | HasMozilla -- Yes --> HasCookie 78 | HasMozilla -- No --> Backend 79 | HasCookie -- Yes --> HasExpired 80 | HasCookie -- No --> PresentChallenge 81 | HasExpired -- Yes --> PresentChallenge 82 | HasExpired -- No --> HasSignature 83 | HasSignature -- Yes --> RandomJitter 84 | HasSignature -- No --> PresentChallenge 85 | RandomJitter -- Yes --> POWPass 86 | RandomJitter -- No --> Backend 87 | POWPass -- Yes --> Backend 88 | PowPass -- No --> PresentChallenge 89 | PresentChallenge -- Back again for another cycle --> Request 90 | ``` 91 | 92 | ### Proof of passing challenges 93 | 94 | When a client passes a challenge, Anubis sets an HTTP cookie named `"within.website-x-cmd-anubis-auth"` containing a signed [JWT](https://jwt.io/) (JSON Web Token). This JWT contains the following claims: 95 | 96 | - `challenge`: The challenge string derived from user request metadata 97 | - `nonce`: The nonce / iteration number used to generate the passing response 98 | - `response`: The hash that passed Anubis' checks 99 | - `iat`: When the token was issued 100 | - `nbf`: One minute prior to when the token was issued 101 | - `exp`: The token's expiry week after the token was issued 102 | 103 | This ensures that the token has enough metadata to prove that the token is valid (due to the token's signature), but also so that the server can independently prove the token is valid. This cookie is allowed to be set without triggering an EU cookie banner notification; but depending on facts and circumstances, you may wish to disclose this to your users. 104 | 105 | ### Challenge format 106 | 107 | Challenges are formed by taking some user request metadata and using that to generate a SHA-256 checksum. The following request headers are used: 108 | 109 | - `Accept-Encoding`: The content encodings that the requestor supports, such as gzip. 110 | - `Accept-Language`: The language that the requestor would prefer the server respond in, such as English. 111 | - `X-Real-Ip`: The IP address of the requestor, as set by a reverse proxy server. 112 | - `User-Agent`: The user agent string of the requestor. 113 | - The current time in UTC rounded to the nearest week. 114 | - The fingerprint (checksum) of Anubis' private ED25519 key. 115 | 116 | This forms a fingerprint of the requestor using metadata that any requestor already is sending. It also uses time as an input, which is known to both the server and requestor due to the nature of linear timelines. Depending on facts and circumstances, you may wish to disclose this to your users. 117 | 118 | ### JWT signing 119 | 120 | Anubis uses an ed25519 keypair to sign the JWTs issued when challenges are passed. Anubis will generate a new ed25519 keypair every time it starts. At this time, there is no way to share this keypair between instance of Anubis, but that will be addressed in future versions. 121 | -------------------------------------------------------------------------------- /docs/docs/design/why-proof-of-work.mdx: -------------------------------------------------------------------------------- 1 | --- 2 | title: Why does Anubis use Proof-of-Work? 3 | --- 4 | 5 | Anubis uses a [proof of work](https://en.wikipedia.org/wiki/Proof_of_work) in order to validate that clients are genuine. The reason Anubis does this was inspired by [Hashcash](https://en.wikipedia.org/wiki/Hashcash), a suggestion from the early 2000's about extending the email protocol to avoid spam. The idea is that genuine people sending emails will have to do a small math problem that is expensive to compute, but easy to verify such as hashing a string with a given number of leading zeroes. This will have basically no impact on individuals sending a few emails a week, but the company churning out industrial quantities of advertising will be required to do prohibitively expensive computation. This is also how Bitcoin's consensus algorithm works. 6 | 7 | ## How Anubis' proof of work scheme works 8 | 9 | A sha256 hash is a bunch of bytes like this: 10 | 11 | ```text 12 | 394d1cc82924c2368d4e34fa450c6b30d5d02f8ae4bb6310e2296593008ff89f 13 | ``` 14 | 15 | We usually write it out in hex form, but that's literally what the bytes in ram look like. In a proof of work validation system, you take some base value (the "challenge") and a constantly incrementing number (the "nonce"), so the thing you end up hashing is this: 16 | 17 | ```js 18 | const hash = await sha256(`${challenge}${nonce}`); 19 | ``` 20 | 21 | In order to pass a challenge, the `hash` has to have the right number of leading zeros (the "difficulty"). When a client requests to pass the challenge, they include the nonce they used. The server then only has to do one sha256 operation: the one that confirms that the challenge (generated from request metadata) and the nonce (provided by the client) match the difficulty number of leading zeroes. 22 | 23 | Ultimately, this is a hack whose real purpose is to give a "good enough" placeholder solution so that more time can be spent on fingerprinting and identifying headless browsers (EG via how they do font rendering) so that the challenge proof of work page doesn't need to be presented to known legitimate users. 24 | 25 | ## Challenge format 26 | 27 | Anubis generates challenges based on browser metadata, including but not limited to the following: 28 | 29 | - The contents of your [`Accept-Language` header](https://developer.mozilla.org/en-US/docs/Web/HTTP/Reference/Headers/Accept-Language) 30 | - The IP address of your client 31 | - Your browser's [`User-Agent` string](https://developer.mozilla.org/en-US/docs/Web/HTTP/Reference/Headers/User-Agent) 32 | - The date of the current week, rooted on Sundays 33 | - Anubis' ed25519 public signing key for [JSON web tokens](https://jwt.io/) (JWTs) 34 | - The challenge difficulty 35 | 36 | This is intended to be a random value that is difficult for attackers to forge and guess, but also deterministic enough that it will naturally reset itself. 37 | -------------------------------------------------------------------------------- /docs/docs/developer/_category_.json: -------------------------------------------------------------------------------- 1 | { 2 | "label": "Developer guides", 3 | "position": 50, 4 | "link": { 5 | "type": "generated-index", 6 | "description": "Guides and suggestions to make Anubis development go smoothly for everyone." 7 | } 8 | } -------------------------------------------------------------------------------- /docs/docs/developer/building-anubis.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Building Anubis without Docker 3 | --- 4 | 5 | :::note 6 | 7 | These instructions may work, but for right now they are informative for downstream packagers more than they are ready-made instructions for administrators wanting to run Anubis on their servers. Pre-made binary package support is being tracked in [#156](https://github.com/TecharoHQ/anubis/issues/156). 8 | 9 | ::: 10 | 11 | ## Entirely from source 12 | 13 | If you are doing a build entirely from source, here's what you need to do: 14 | 15 | :::info 16 | 17 | If you maintain a package for Anubis v1.15.x or older, you will need to update your package build. You may want to use one of the half-baked tarballs if your distro/environment of choice makes it difficult to use npm. 18 | 19 | ::: 20 | 21 | ### Tools needed 22 | 23 | In order to build a production-ready binary of Anubis, you need the following packages in your environment: 24 | 25 | - [Go](https://go.dev) at least version 1.24 - the programming language that Anubis is written in 26 | - [esbuild](https://esbuild.github.io/) - the JavaScript bundler Anubis uses for its production JS assets 27 | - [Node.JS & NPM](https://nodejs.org/en) - manages some build dependencies 28 | - `gzip` - compresses production JS (part of coreutils) 29 | - `zstd` - compresses production JS 30 | - `brotli` - compresses production JS 31 | 32 | To upgrade your version of Go without system package manager support, install `golang.org/dl/go1.24.2` (this can be done from any version of Go): 33 | 34 | ```text 35 | go install golang.org/dl/go1.24.2@latest 36 | go1.24.2 download 37 | ``` 38 | 39 | ### Install dependencies 40 | 41 | ```text 42 | make deps 43 | ``` 44 | 45 | This will download Go and NPM dependencies. 46 | 47 | ### Building static assets 48 | 49 | ```text 50 | make assets 51 | ``` 52 | 53 | This will build all static assets (CSS, JavaScript) for distribution. 54 | 55 | ### Building Anubis to the `./var` folder 56 | 57 | ```text 58 | make build 59 | ``` 60 | 61 | From this point it is up to you to make sure that `./var/anubis` ends up in the right place. You may want to consult the `./run` folder for useful files such as a systemd unit and `anubis.env.default` file. 62 | 63 | ## "Pre-baked" tarball 64 | 65 | The `anubis-src-with-vendor` tarball has many pre-build steps already done, including: 66 | 67 | - Go module dependencies are present in `./vendor` 68 | - Static assets (JS, CSS, etc.) are already built in CI 69 | 70 | This means you do not have to manage Go, NPM, or other ecosystem dependencies. 71 | 72 | When using this tarball, all you need to do is build `./cmd/anubis`: 73 | 74 | ```text 75 | make prebaked-build 76 | ``` 77 | 78 | Anubis will be built to `./var/anubis`. 79 | 80 | ## Development dependencies 81 | 82 | Optionally, you can install the following dependencies for development: 83 | 84 | - [Staticcheck](https://staticcheck.dev/docs/getting-started/) (optional, not required due to [`go tool staticcheck`](https://www.alexedwards.net/blog/how-to-manage-tool-dependencies-in-go-1.24-plus), but required if you are using any version of Go older than 1.24) 85 | -------------------------------------------------------------------------------- /docs/docs/developer/code-quality.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Code quality guidelines 3 | --- 4 | 5 | When submitting code to Anubis, please take the time to consider the fact that this project is security software. If things go bad, bots can pummel sites into oblivion. This is not ideal for uptime. 6 | 7 | As such, code reviews will be a bit more strict than you have seen in other projects. This is not people trying to be mean, this is a side effect of taking the problem seriously. 8 | 9 | When making code changes, try to do the following: 10 | 11 | - If you're submitting a bugfix, add a test case for it 12 | - If you're changing the JavaScript, make sure the integration tests pass (`npm run test:integration`) 13 | 14 | ## Commit messages 15 | 16 | Anubis follows the Go project's conventions for commit messages. In general, an ideal commit message should read like this: 17 | 18 | ```text 19 | path/to/folder: brief description of the change 20 | 21 | If the change is subtle, has implementation consequences, or is otherwise 22 | not entirely self-describing: take the time to spell out why. If things 23 | are very subtle, please also amend the documentation accordingly 24 | ``` 25 | 26 | The subject of a commit message should be the second half of the sentence "This commit changes the Anubis project to:". Here's a few examples: 27 | 28 | - `disable DroneBL by default` 29 | - `port the challenge to WebAssembly` 30 | 31 | The extended commit message is also your place to give rationale for a new feature. When maintainers are reviewing your code, they will use this to figure out if the burden from feature maintainership is worth the merge. 32 | -------------------------------------------------------------------------------- /docs/docs/developer/local-dev.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Local development 3 | --- 4 | 5 | :::note 6 | 7 | TL;DR: `npm ci && npm run dev` 8 | 9 | ::: 10 | 11 | Anubis requires the following tools to be installed to do local development: 12 | 13 | - [Go](https://go.dev) - the programming language that Anubis is written in 14 | - [esbuild](https://esbuild.github.io/) - the JavaScript bundler Anubis uses for its production JS assets 15 | - [Node.JS & NPM](https://nodejs.org/en) - manages some build dependencies 16 | - `gzip` - compresses production JS (part of coreutils) 17 | - `zstd` - compresses production JS 18 | - `brotli` - compresses production JS 19 | 20 | If you have [Homebrew](https://brew.sh) installed, you can install all the dependencies with one command: 21 | 22 | ```text 23 | brew bundle 24 | ``` 25 | 26 | If you don't, you may need to figure out equivalents to the packages in Homebrew. 27 | 28 | ## Running Anubis locally 29 | 30 | ```text 31 | npm run dev 32 | ``` 33 | 34 | Or to do it manually: 35 | 36 | - Run `npm run assets` every time you change the CSS/JavaScript 37 | - `go run ./cmd/anubis` with any CLI flags you want 38 | 39 | ## Building JS/CSS assets 40 | 41 | ```text 42 | npm run assets 43 | ``` 44 | 45 | If you change the build process, make sure to update `build.sh` accordingly. 46 | 47 | ## Production-ready builds 48 | 49 | ```text 50 | npm run container 51 | ``` 52 | 53 | This builds a prod-ready container image with [ko](https://ko.build). If you want to change where the container image is pushed, you need to use environment variables: 54 | 55 | ```text 56 | DOCKER_REPO=registry.host/org/repo DOCKER_METADATA_OUTPUT_TAGS=registry.host/org/repo:latest npm run container 57 | ``` 58 | 59 | ## Building packages 60 | 61 | For more information, see [Building native packages is complicated](https://xeiaso.net/blog/2025/anubis-packaging/) and [#156: Debian, RPM, and binary tarball packages](https://github.com/TecharoHQ/anubis/issues/156). 62 | 63 | Install `yeet`: 64 | 65 | :::note 66 | 67 | `yeet` will soon be moved to a dedicated TecharoHQ repository. This is currently done in a hacky way in order to get this ready for user feedback. 68 | 69 | ::: 70 | 71 | ```text 72 | go install within.website/x/cmd/yeet@v1.13.4 73 | ``` 74 | 75 | Install the dependencies for Anubis: 76 | 77 | ```text 78 | npm ci 79 | go mod download 80 | ``` 81 | 82 | Build the packages into `./var`: 83 | 84 | ```text 85 | yeet 86 | ``` 87 | -------------------------------------------------------------------------------- /docs/docs/developer/signed-commits.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Signed commits 3 | --- 4 | 5 | Anubis requires developers to sign their commits. This is done so that we can have a better chain of custody from contribution to owner. For more information about commit signing, [read here](https://www.freecodecamp.org/news/what-is-commit-signing-in-git/). 6 | 7 | We do not require GPG. SSH signed commits are fine. For an overview on how to set up commit signing with your SSH key, [read here](https://dev.to/ccoveille/git-the-complete-guide-to-sign-your-commits-with-an-ssh-key-35bg). 8 | -------------------------------------------------------------------------------- /docs/docs/funding.md: -------------------------------------------------------------------------------- 1 | --- 2 | sidebar_position: 998 3 | title: Supporting Anubis financially 4 | --- 5 | 6 | Anubis is provided to the public for free in order to help advance the common good. In return, we ask (but not demand, these are words on the internet, not word of law) that you not remove the Anubis character from your deployment. 7 | 8 | If you want to run an unbranded or white-label version of Anubis, please [contact Xe](https://xeiaso.net/contact) to arrange a contract. This is not meant to be "contact us" pricing, I am still evaluating the market for this solution and figuring out what makes sense. 9 | 10 | You can donate to the project [on Patreon](https://patreon.com/cadey) or via [GitHub Sponsors](https://github.com/sponsors/Xe). 11 | -------------------------------------------------------------------------------- /docs/docs/index.mdx: -------------------------------------------------------------------------------- 1 | --- 2 | sidebar_position: 1 3 | title: Anubis 4 | --- 5 | 6 | A smiling chibi dark-skinned anthro jackal with brown hair and tall ears looking victorious with a thumbs-up 11 | 12 | ![enbyware](https://pride-badges.pony.workers.dev/static/v1?label=enbyware&labelColor=%23555&stripeWidth=8&stripeColors=FCF434%2CFFFFFF%2C9C59D1%2C2C2C2C) 13 | ![GitHub Issues or Pull Requests by label](https://img.shields.io/github/issues/TecharoHQ/anubis) 14 | ![GitHub go.mod Go version](https://img.shields.io/github/go-mod/go-version/TecharoHQ/anubis) 15 | ![language count](https://img.shields.io/github/languages/count/TecharoHQ/anubis) 16 | ![repo size](https://img.shields.io/github/repo-size/TecharoHQ/anubis) 17 | 18 | Anubis [weighs the soul of your connection](https://en.wikipedia.org/wiki/Weighing_of_souls) using a sha256 proof-of-work challenge in order to protect upstream resources from scraper bots. 19 | 20 | This program is designed to help protect the small internet from the endless storm of requests that flood in from AI companies. Anubis is as lightweight as possible to ensure that everyone can afford to protect the communities closest to them. 21 | 22 | Anubis is a bit of a nuclear response. This will result in your website being blocked from smaller scrapers and may inhibit "good bots" like the Internet Archive. You can configure [bot policy definitions](./admin/policies.mdx) to explicitly allowlist them and we are working on a curated set of "known good" bots to allow for a compromise between discoverability and uptime. 23 | 24 | ## Support 25 | 26 | If you run into any issues running Anubis, please [open an issue](https://github.com/TecharoHQ/anubis/issues/new?template=Blank+issue) and include all the information I would need to diagnose your issue. 27 | 28 | For live chat, please join the [Patreon](https://patreon.com/cadey) and ask in the Patron discord in the channel `#anubis`. 29 | 30 | ## Star History 31 | 32 | [![Star History Chart](https://api.star-history.com/svg?repos=TecharoHQ/anubis&type=Date)](https://www.star-history.com/#TecharoHQ/anubis&Date) 33 | 34 | ## Packaging Status 35 | 36 | [![Packaging status](https://repology.org/badge/vertical-allrepos/anubis-anti-crawler.svg)](https://repology.org/project/anubis-anti-crawler/versions) 37 | 38 | ## Contributors 39 | 40 | 41 | 42 | 43 | 44 | Made with [contrib.rocks](https://contrib.rocks). 45 | -------------------------------------------------------------------------------- /docs/docs/user/_category_.json: -------------------------------------------------------------------------------- 1 | { 2 | "label": "User guides", 3 | "position": 60, 4 | "link": { 5 | "type": "generated-index", 6 | "description": "Information for users on sites that use Anubis." 7 | } 8 | } -------------------------------------------------------------------------------- /docs/docs/user/known-broken-extensions.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: List of known browser extensions that can break Anubis 3 | --- 4 | 5 | This page contains a list of all of the browser extensions that are known to break Anubis' functionality and their associated GitHub issues, along with instructions on how to work around the issue. 6 | ## [JShelter](https://jshelter.org/) 7 | 8 | | Extension | JShelter | 9 | | :----------- | :-------------------------------------------- | 10 | | Website | [jshelter.org](https://jshelter.org/) | 11 | | GitHub issue | https://github.com/TecharoHQ/anubis/issues/25 | 12 | 13 | Workaround steps: 14 | 15 | 1. Open JShelter extension settings 16 | 2. Click on JS Shield details 17 | 3. Enter in the domain for a website protected by Anubis 18 | 4. Choose "Turn JavaScript Shield off" 19 | 5. Hit "Add to list" 20 | -------------------------------------------------------------------------------- /docs/docs/user/known-instances.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: List of known websites using Anubis 3 | --- 4 | 5 | This page contains a non-exhaustive list with all websites using Anubis. 6 | 7 | -
8 | The Linux Foundation 9 | - https://git.kernel.org/ 10 | - https://lore.kernel.org/ 11 |
12 | - https://gitlab.gnome.org/ 13 | - https://scioly.org/ 14 | - https://bugs.winehq.org/ 15 | - https://svnweb.freebsd.org/ 16 | - https://trac.ffmpeg.org/ 17 | - https://git.sr.ht/ 18 | - https://xeiaso.net/ 19 | - https://source.puri.sm/ 20 | - https://git.enlightenment.org/ 21 | - https://superlove.sayitditto.net/ 22 | - https://linktaco.com/ 23 | - https://jaredallard.dev/ 24 | - https://dev.sanctum.geek.nz/ 25 | - https://canine.tools/ 26 | - https://git.lupancham.net/ 27 | - https://dev.haiku-os.org 28 | - http://code.hackerspace.pl/ 29 | - https://wiki.archlinux.org/ 30 | - https://git.devuan.org/ 31 | 32 | -
33 | The United Nations 34 | 35 | - https://policytoolbox.iiep.unesco.org/ 36 |
37 | -------------------------------------------------------------------------------- /docs/docs/user/why-see-challenge.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Why is Anubis showing up on a website? 3 | --- 4 | 5 | You are seeing Anubis because the administrator of that website has set up [Anubis](https://github.com/TecharoHQ/anubis) to protect the server against the scourge of [AI companies aggressively scraping websites](https://thelibre.news/foss-infrastructure-is-under-attack-by-ai-companies/). This can and does cause downtime for the websites, which makes their resources inaccessible for everyone. 6 | 7 | Anubis is a compromise. Anubis uses a [proof-of-work](/docs/design/why-proof-of-work) scheme in the vein of [Hashcash](https://en.wikipedia.org/wiki/Hashcash), a proposed proof-of-work scheme for reducing email spam. The idea is that at individual scales the additional load is ignorable, but at mass scraper levels it adds up and makes scraping much more expensive. 8 | 9 | Ultimately, this is a hack whose real purpose is to give a "good enough" placeholder solution so that more time can be spent on fingerprinting and identifying headless browsers (EG: via how they do font rendering) so that the challenge proof of work page doesn't need to be presented to users that are much more likely to be legitimate. 10 | -------------------------------------------------------------------------------- /docs/docusaurus.config.ts: -------------------------------------------------------------------------------- 1 | import { themes as prismThemes } from 'prism-react-renderer'; 2 | import type { Config } from '@docusaurus/types'; 3 | import type * as Preset from '@docusaurus/preset-classic'; 4 | 5 | // This runs in Node.js - Don't use client-side code here (browser APIs, JSX...) 6 | 7 | const config: Config = { 8 | title: 'Anubis', 9 | tagline: 'Weigh the soul of incoming HTTP requests using proof-of-work to stop AI crawlers', 10 | favicon: 'img/favicon.ico', 11 | 12 | // Set the production url of your site here 13 | url: 'https://anubis.techaro.lol', 14 | // Set the // pathname under which your site is served 15 | // For GitHub pages deployment, it is often '//' 16 | baseUrl: '/', 17 | 18 | // GitHub pages deployment config. 19 | // If you aren't using GitHub pages, you don't need these. 20 | organizationName: 'TecharoHQ', // Usually your GitHub org/user name. 21 | projectName: 'anubis', // Usually your repo name. 22 | 23 | onBrokenLinks: 'throw', 24 | onBrokenMarkdownLinks: 'warn', 25 | 26 | // Even if you don't use internationalization, you can use this field to set 27 | // useful metadata like html lang. For example, if your site is Chinese, you 28 | // may want to replace "en" with "zh-Hans". 29 | i18n: { 30 | defaultLocale: 'en', 31 | locales: ['en'], 32 | }, 33 | 34 | markdown: { 35 | mermaid: true, 36 | }, 37 | themes: ['@docusaurus/theme-mermaid'], 38 | 39 | presets: [ 40 | [ 41 | 'classic', 42 | { 43 | docs: { 44 | sidebarPath: './sidebars.ts', 45 | // Please change this to your repo. 46 | // Remove this to remove the "edit this page" links. 47 | editUrl: 48 | 'https://github.com/TecharoHQ/anubis/tree/main/docs/', 49 | }, 50 | // blog: { 51 | // showReadingTime: true, 52 | // feedOptions: { 53 | // type: ['rss', 'atom', "json"], 54 | // xslt: true, 55 | // }, 56 | // // Please change this to your repo. 57 | // // Remove this to remove the "edit this page" links. 58 | // editUrl: 59 | // 'https://github.com/facebook/docusaurus/tree/main/packages/create-docusaurus/templates/shared/', 60 | // // Useful options to enforce blogging best practices 61 | // onInlineTags: 'warn', 62 | // onInlineAuthors: 'warn', 63 | // onUntruncatedBlogPosts: 'warn', 64 | // }, 65 | theme: { 66 | customCss: './src/css/custom.css', 67 | }, 68 | } satisfies Preset.Options, 69 | ], 70 | ], 71 | 72 | themeConfig: { 73 | colorMode: { 74 | respectPrefersColorScheme: true, 75 | }, 76 | // Replace with your project's social card 77 | image: 'img/docusaurus-social-card.jpg', 78 | navbar: { 79 | title: 'Anubis', 80 | logo: { 81 | alt: 'A happy jackal woman with brown hair and red eyes', 82 | src: 'img/favicon.webp', 83 | }, 84 | items: [ 85 | { 86 | type: 'docSidebar', 87 | sidebarId: 'tutorialSidebar', 88 | position: 'left', 89 | label: 'Tutorial', 90 | }, 91 | // { to: '/blog', label: 'Blog', position: 'left' }, 92 | { 93 | href: 'https://github.com/TecharoHQ/anubis', 94 | label: 'GitHub', 95 | position: 'right', 96 | }, 97 | ], 98 | }, 99 | footer: { 100 | style: 'dark', 101 | links: [ 102 | { 103 | title: 'Docs', 104 | items: [ 105 | { 106 | label: 'Intro', 107 | to: '/docs/', 108 | }, 109 | { 110 | label: "Installation", 111 | to: "/docs/admin/installation", 112 | }, 113 | ], 114 | }, 115 | { 116 | title: 'Community', 117 | items: [ 118 | { 119 | label: 'GitHub Discussions', 120 | href: 'https://github.com/TecharoHQ/anubis/discussions', 121 | }, 122 | { 123 | label: 'Bluesky', 124 | href: 'https://bsky.app/profile/techaro.lol', 125 | }, 126 | ], 127 | }, 128 | { 129 | title: 'More', 130 | items: [ 131 | { 132 | label: 'GitHub', 133 | href: 'https://github.com/TecharoHQ/anubis', 134 | }, 135 | ], 136 | }, 137 | ], 138 | copyright: `Copyright © ${new Date().getFullYear()} Techaro. Made with ❤️ in 🇨🇦.`, 139 | }, 140 | prism: { 141 | theme: prismThemes.github, 142 | darkTheme: prismThemes.dracula, 143 | magicComments: [ 144 | { 145 | className: 'code-block-diff-add-line', 146 | line: 'diff-add' 147 | }, 148 | { 149 | className: 'code-block-diff-remove-line', 150 | line: 'diff-remove' 151 | } 152 | ], 153 | }, 154 | } satisfies Preset.ThemeConfig, 155 | }; 156 | 157 | export default config; 158 | -------------------------------------------------------------------------------- /docs/manifest/deployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: anubis-docs 5 | spec: 6 | selector: 7 | matchLabels: 8 | app: anubis-docs 9 | template: 10 | metadata: 11 | labels: 12 | app: anubis-docs 13 | spec: 14 | containers: 15 | - name: anubis-docs 16 | image: ghcr.io/techarohq/anubis/docs:main 17 | imagePullPolicy: Always 18 | resources: 19 | limits: 20 | memory: "128Mi" 21 | cpu: "500m" 22 | ports: 23 | - containerPort: 80 24 | - name: anubis 25 | image: ghcr.io/techarohq/anubis:main 26 | imagePullPolicy: Always 27 | env: 28 | - name: "BIND" 29 | value: ":8081" 30 | - name: "DIFFICULTY" 31 | value: "4" 32 | - name: "METRICS_BIND" 33 | value: ":9090" 34 | - name: "POLICY_FNAME" 35 | value: "" 36 | - name: "SERVE_ROBOTS_TXT" 37 | value: "false" 38 | - name: "TARGET" 39 | value: "http://localhost:80" 40 | # - name: "SLOG_LEVEL" 41 | # value: "debug" 42 | resources: 43 | limits: 44 | cpu: 500m 45 | memory: 128Mi 46 | requests: 47 | cpu: 250m 48 | memory: 128Mi 49 | securityContext: 50 | runAsUser: 1000 51 | runAsGroup: 1000 52 | runAsNonRoot: true 53 | allowPrivilegeEscalation: false 54 | capabilities: 55 | drop: 56 | - ALL 57 | seccompProfile: 58 | type: RuntimeDefault 59 | -------------------------------------------------------------------------------- /docs/manifest/ingress.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: networking.k8s.io/v1 2 | kind: Ingress 3 | metadata: 4 | name: anubis-docs 5 | annotations: 6 | cert-manager.io/cluster-issuer: "letsencrypt-prod" 7 | nginx.ingress.kubernetes.io/limit-rps: "10" 8 | spec: 9 | ingressClassName: nginx 10 | tls: 11 | - hosts: 12 | - anubis.techaro.lol 13 | secretName: anubis-techaro-lol-public-tls 14 | rules: 15 | - host: anubis.techaro.lol 16 | http: 17 | paths: 18 | - pathType: Prefix 19 | path: "/" 20 | backend: 21 | service: 22 | name: anubis-docs 23 | port: 24 | name: anubis -------------------------------------------------------------------------------- /docs/manifest/kustomization.yaml: -------------------------------------------------------------------------------- 1 | resources: 2 | - deployment.yaml 3 | - ingress.yaml 4 | - onionservice.yaml 5 | - service.yaml -------------------------------------------------------------------------------- /docs/manifest/onionservice.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: tor.k8s.torproject.org/v1alpha2 2 | kind: OnionService 3 | metadata: 4 | name: anubis-docs 5 | spec: 6 | version: 3 7 | rules: 8 | - port: 9 | number: 80 10 | backend: 11 | service: 12 | name: anubis-docs 13 | port: 14 | number: 80 -------------------------------------------------------------------------------- /docs/manifest/service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: anubis-docs 5 | spec: 6 | selector: 7 | app: anubis-docs 8 | ports: 9 | - port: 80 10 | targetPort: 80 11 | name: http 12 | - port: 8081 13 | targetPort: 8081 14 | name: anubis 15 | -------------------------------------------------------------------------------- /docs/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "docs", 3 | "version": "0.0.0", 4 | "private": true, 5 | "scripts": { 6 | "docusaurus": "docusaurus", 7 | "start": "docusaurus start", 8 | "build": "docusaurus build", 9 | "swizzle": "docusaurus swizzle", 10 | "deploy": "echo 'use CI' && exit 1", 11 | "clear": "docusaurus clear", 12 | "serve": "docusaurus serve", 13 | "write-translations": "docusaurus write-translations", 14 | "write-heading-ids": "docusaurus write-heading-ids", 15 | "typecheck": "tsc" 16 | }, 17 | "dependencies": { 18 | "@docusaurus/core": "3.7.0", 19 | "@docusaurus/preset-classic": "3.7.0", 20 | "@docusaurus/theme-mermaid": "^3.7.0", 21 | "@mdx-js/react": "^3.0.0", 22 | "clsx": "^2.0.0", 23 | "prism-react-renderer": "^2.3.0", 24 | "react": "^19.0.0", 25 | "react-dom": "^19.0.0" 26 | }, 27 | "devDependencies": { 28 | "@docusaurus/module-type-aliases": "3.7.0", 29 | "@docusaurus/tsconfig": "3.7.0", 30 | "@docusaurus/types": "3.7.0", 31 | "typescript": "~5.6.2" 32 | }, 33 | "browserslist": { 34 | "production": [ 35 | ">0.5%", 36 | "not dead", 37 | "not op_mini all" 38 | ], 39 | "development": [ 40 | "last 3 chrome version", 41 | "last 3 firefox version", 42 | "last 5 safari version" 43 | ] 44 | }, 45 | "engines": { 46 | "node": ">=18.0" 47 | } 48 | } -------------------------------------------------------------------------------- /docs/sidebars.ts: -------------------------------------------------------------------------------- 1 | import type {SidebarsConfig} from '@docusaurus/plugin-content-docs'; 2 | 3 | // This runs in Node.js - Don't use client-side code here (browser APIs, JSX...) 4 | 5 | /** 6 | * Creating a sidebar enables you to: 7 | - create an ordered group of docs 8 | - render a sidebar for each doc of that group 9 | - provide next/previous navigation 10 | 11 | The sidebars can be generated from the filesystem, or explicitly defined here. 12 | 13 | Create as many sidebars as you want. 14 | */ 15 | const sidebars: SidebarsConfig = { 16 | // By default, Docusaurus generates a sidebar from the docs folder structure 17 | tutorialSidebar: [{type: 'autogenerated', dirName: '.'}], 18 | 19 | // But you can create a sidebar manually 20 | /* 21 | tutorialSidebar: [ 22 | 'intro', 23 | 'hello', 24 | { 25 | type: 'category', 26 | label: 'Tutorial', 27 | items: ['tutorial-basics/create-a-document'], 28 | }, 29 | ], 30 | */ 31 | }; 32 | 33 | export default sidebars; 34 | -------------------------------------------------------------------------------- /docs/src/components/HomepageFeatures/index.tsx: -------------------------------------------------------------------------------- 1 | import type { ReactNode } from "react"; 2 | import clsx from "clsx"; 3 | import Heading from "@theme/Heading"; 4 | import styles from "./styles.module.css"; 5 | 6 | type FeatureItem = { 7 | title: string; 8 | Svg: React.ComponentType>; 9 | description: ReactNode; 10 | }; 11 | 12 | const FeatureList: FeatureItem[] = [ 13 | { 14 | title: "Easy to Use", 15 | Svg: require("@site/static/img/undraw_docusaurus_mountain.svg").default, 16 | description: ( 17 | <> 18 | Anubis is easy to set up, lightweight, and helps get rid of the lowest 19 | hanging fruit so you can sleep at night. 20 | 21 | ), 22 | }, 23 | { 24 | title: "Lightweight", 25 | Svg: require("@site/static/img/undraw_docusaurus_tree.svg").default, 26 | description: ( 27 | <> 28 | Anubis is efficient and as lightweight as possible, blocking the worst 29 | of the bots on the internet and makes it easy to protect what you host 30 | online. 31 | 32 | ), 33 | }, 34 | { 35 | title: "Multi-threaded", 36 | Svg: require("@site/static/img/undraw_docusaurus_react.svg").default, 37 | description: ( 38 | <> 39 | Anubis uses a multi-threaded proof of work check to ensure that users 40 | browsers are up to date and support modern standards. 41 | 42 | ), 43 | }, 44 | ]; 45 | 46 | function Feature({ title, Svg, description }: FeatureItem) { 47 | return ( 48 |
49 |
50 | 51 |
52 |
53 | {title} 54 |

{description}

55 |
56 |
57 | ); 58 | } 59 | 60 | export default function HomepageFeatures(): ReactNode { 61 | return ( 62 |
63 |
64 |
65 | {FeatureList.map((props, idx) => ( 66 | 67 | ))} 68 |
69 |
70 |
71 | ); 72 | } 73 | -------------------------------------------------------------------------------- /docs/src/components/HomepageFeatures/styles.module.css: -------------------------------------------------------------------------------- 1 | .features { 2 | display: flex; 3 | align-items: center; 4 | padding: 2rem 0; 5 | width: 100%; 6 | } 7 | 8 | .featureSvg { 9 | height: 200px; 10 | width: 200px; 11 | } 12 | -------------------------------------------------------------------------------- /docs/src/components/RandomKey/index.tsx: -------------------------------------------------------------------------------- 1 | import { useState, useCallback } from "react"; 2 | import Code from "@theme/CodeInline"; 3 | import BrowserOnly from "@docusaurus/BrowserOnly"; 4 | 5 | // https://www.xaymar.com/articles/2020/12/08/fastest-uint8array-to-hex-string-conversion-in-javascript/ 6 | function toHex(buffer) { 7 | return Array.prototype.map 8 | .call(buffer, (x) => ("00" + x.toString(16)).slice(-2)) 9 | .join(""); 10 | } 11 | 12 | export const genRandomKey = (): String => { 13 | const array = new Uint8Array(32); 14 | self.crypto.getRandomValues(array); 15 | return toHex(array); 16 | }; 17 | 18 | export default function RandomKey() { 19 | return ( 20 | Loading...}> 21 | {() => { 22 | const [key, setKey] = useState(genRandomKey()); 23 | const genRandomKeyCb = useCallback(() => { 24 | setKey(genRandomKey()); 25 | }); 26 | return ( 27 | 28 | {key} 29 | 30 | 37 | 38 | ); 39 | }} 40 | 41 | ); 42 | } 43 | -------------------------------------------------------------------------------- /docs/src/css/custom.css: -------------------------------------------------------------------------------- 1 | /** 2 | * Any CSS included here will be global. The classic template 3 | * bundles Infima by default. Infima is a CSS framework designed to 4 | * work well for content-centric websites. 5 | */ 6 | 7 | /* You can override the default Infima variables here. */ 8 | :root { 9 | --ifm-color-primary: #ff5630; 10 | --ifm-color-primary-dark: #ad422a; 11 | --ifm-color-primary-darker: #8f3521; 12 | --ifm-color-primary-darkest: #592115; 13 | --ifm-color-primary-light: #ff7152; 14 | --ifm-color-primary-lighter: #ff9178; 15 | --ifm-color-primary-lightest: #ffb09e; 16 | --ifm-code-font-size: 95%; 17 | --docusaurus-highlighted-code-line-bg: rgba(0, 0, 0, 0.1); 18 | --code-block-diff-add-line-color: #ccffd8; 19 | --code-block-diff-remove-line-color: #ffebe9; 20 | } 21 | 22 | /* For readability concerns, you should choose a lighter palette in dark mode. */ 23 | [data-theme="dark"] { 24 | --ifm-color-primary: #e64a19; 25 | --ifm-color-primary-dark: #b73a12; 26 | --ifm-color-primary-darker: #8c2c0e; 27 | --ifm-color-primary-darkest: #5a1e0a; 28 | --ifm-color-primary-light: #eb6d45; 29 | --ifm-color-primary-lighter: #f09178; 30 | --ifm-color-primary-lightest: #f5b5a6; 31 | --ifm-code-font-size: 95%; 32 | --docusaurus-highlighted-code-line-bg: rgba(0, 0, 0, 0.25); 33 | --code-block-diff-add-line-color: #2d5a2c; 34 | --code-block-diff-remove-line-color: #5a2d2c; 35 | } 36 | 37 | .code-block-diff-add-line { 38 | background-color: var(--code-block-diff-add-line-color); 39 | display: block; 40 | margin: 0 -40px; 41 | padding: 0 40px; 42 | } 43 | 44 | .code-block-diff-add-line::before { 45 | position: absolute; 46 | left: 8px; 47 | padding-right: 8px; 48 | content: "+"; 49 | } 50 | 51 | .code-block-diff-remove-line { 52 | background-color: var(--code-block-diff-remove-line-color); 53 | display: block; 54 | margin: 0 -40px; 55 | padding: 0 40px; 56 | } 57 | 58 | .code-block-diff-remove-line::before { 59 | position: absolute; 60 | left: 8px; 61 | padding-right: 8px; 62 | content: "-"; 63 | } 64 | 65 | /** 66 | * use magic comments to mark diff blocks 67 | */ 68 | pre code:has(.code-block-diff-add-line) { 69 | padding-left: 40px !important; 70 | } 71 | 72 | pre code:has(.code-block-diff-remove-line) { 73 | padding-left: 40px !important; 74 | } 75 | -------------------------------------------------------------------------------- /docs/src/pages/index.module.css: -------------------------------------------------------------------------------- 1 | /** 2 | * CSS files with the .module.css suffix will be treated as CSS modules 3 | * and scoped locally. 4 | */ 5 | 6 | .heroBanner { 7 | padding: 4rem 0; 8 | text-align: center; 9 | position: relative; 10 | overflow: hidden; 11 | } 12 | 13 | @media screen and (max-width: 996px) { 14 | .heroBanner { 15 | padding: 2rem; 16 | } 17 | } 18 | 19 | .buttons { 20 | display: flex; 21 | align-items: center; 22 | justify-content: center; 23 | } 24 | -------------------------------------------------------------------------------- /docs/src/pages/index.tsx: -------------------------------------------------------------------------------- 1 | import type { ReactNode } from "react"; 2 | import clsx from "clsx"; 3 | import Link from "@docusaurus/Link"; 4 | import useDocusaurusContext from "@docusaurus/useDocusaurusContext"; 5 | import Layout from "@theme/Layout"; 6 | import HomepageFeatures from "@site/src/components/HomepageFeatures"; 7 | import Heading from "@theme/Heading"; 8 | 9 | import styles from "./index.module.css"; 10 | 11 | function HomepageHeader() { 12 | const { siteConfig } = useDocusaurusContext(); 13 | return ( 14 |
15 |
16 | 17 | {siteConfig.title} 18 | 19 |

{siteConfig.tagline}

20 |
21 | 22 | Get started 23 | 24 |
25 |
26 |
27 | ); 28 | } 29 | 30 | export default function Home(): ReactNode { 31 | const { siteConfig } = useDocusaurusContext(); 32 | return ( 33 | 37 | 38 |
39 | 40 | 41 |
42 |

43 | This is all placeholder text. It will be fixed. Give me time. I am 44 | one person and my project has unexpectedly gone viral. 45 |

46 |
47 |
48 |
49 | ); 50 | } 51 | -------------------------------------------------------------------------------- /docs/static/.nojekyll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TecharoHQ/anubis/30b0ba805533fedacc397c0b8a857b58046a3029/docs/static/.nojekyll -------------------------------------------------------------------------------- /docs/static/img/android-chrome-512x512.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TecharoHQ/anubis/30b0ba805533fedacc397c0b8a857b58046a3029/docs/static/img/android-chrome-512x512.png -------------------------------------------------------------------------------- /docs/static/img/docusaurus-social-card.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TecharoHQ/anubis/30b0ba805533fedacc397c0b8a857b58046a3029/docs/static/img/docusaurus-social-card.jpg -------------------------------------------------------------------------------- /docs/static/img/docusaurus.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TecharoHQ/anubis/30b0ba805533fedacc397c0b8a857b58046a3029/docs/static/img/docusaurus.png -------------------------------------------------------------------------------- /docs/static/img/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TecharoHQ/anubis/30b0ba805533fedacc397c0b8a857b58046a3029/docs/static/img/favicon.ico -------------------------------------------------------------------------------- /docs/static/img/favicon.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TecharoHQ/anubis/30b0ba805533fedacc397c0b8a857b58046a3029/docs/static/img/favicon.webp -------------------------------------------------------------------------------- /docs/static/img/happy.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TecharoHQ/anubis/30b0ba805533fedacc397c0b8a857b58046a3029/docs/static/img/happy.webp -------------------------------------------------------------------------------- /docs/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | // This file is not used in compilation. It is here just for a nice editor experience. 3 | "extends": "@docusaurus/tsconfig", 4 | "compilerOptions": { 5 | "baseUrl": "." 6 | }, 7 | "exclude": [".docusaurus", "build"] 8 | } 9 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/TecharoHQ/anubis 2 | 3 | go 1.24 4 | 5 | require ( 6 | github.com/a-h/templ v0.3.857 7 | github.com/facebookgo/flagenv v0.0.0-20160425205200-fcd59fca7456 8 | github.com/golang-jwt/jwt/v5 v5.2.2 9 | github.com/playwright-community/playwright-go v0.5101.0 10 | github.com/prometheus/client_golang v1.22.0 11 | github.com/sebest/xff v0.0.0-20210106013422-671bd2870b3a 12 | github.com/yl2chen/cidranger v1.0.2 13 | golang.org/x/net v0.39.0 14 | ) 15 | 16 | require ( 17 | github.com/BurntSushi/toml v1.4.1-0.20240526193622-a339e1f7089c // indirect 18 | github.com/a-h/parse v0.0.0-20250122154542-74294addb73e // indirect 19 | github.com/andybalholm/brotli v1.1.0 // indirect 20 | github.com/beorn7/perks v1.0.1 // indirect 21 | github.com/cenkalti/backoff/v4 v4.3.0 // indirect 22 | github.com/cespare/xxhash/v2 v2.3.0 // indirect 23 | github.com/cli/browser v1.3.0 // indirect 24 | github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect 25 | github.com/deckarep/golang-set/v2 v2.6.0 // indirect 26 | github.com/facebookgo/ensure v0.0.0-20160127193407-b4ab57deab51 // indirect 27 | github.com/facebookgo/stack v0.0.0-20160209184415-751773369052 // indirect 28 | github.com/facebookgo/subset v0.0.0-20150612182917-8dac2c3c4870 // indirect 29 | github.com/fatih/color v1.16.0 // indirect 30 | github.com/fsnotify/fsnotify v1.7.0 // indirect 31 | github.com/go-jose/go-jose/v3 v3.0.4 // indirect 32 | github.com/go-stack/stack v1.8.1 // indirect 33 | github.com/mattn/go-colorable v0.1.13 // indirect 34 | github.com/mattn/go-isatty v0.0.20 // indirect 35 | github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect 36 | github.com/natefinch/atomic v1.0.1 // indirect 37 | github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect 38 | github.com/prometheus/client_model v0.6.1 // indirect 39 | github.com/prometheus/common v0.62.0 // indirect 40 | github.com/prometheus/procfs v0.15.1 // indirect 41 | golang.org/x/exp/typeparams v0.0.0-20231108232855-2478ac86f678 // indirect 42 | golang.org/x/mod v0.24.0 // indirect 43 | golang.org/x/sync v0.12.0 // indirect 44 | golang.org/x/sys v0.32.0 // indirect 45 | golang.org/x/tools v0.31.0 // indirect 46 | google.golang.org/protobuf v1.36.5 // indirect 47 | honnef.co/go/tools v0.6.1 // indirect 48 | k8s.io/apimachinery v0.32.3 // indirect 49 | sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3 // indirect 50 | sigs.k8s.io/yaml v1.4.0 // indirect 51 | ) 52 | 53 | tool ( 54 | github.com/a-h/templ/cmd/templ 55 | golang.org/x/tools/cmd/stringer 56 | honnef.co/go/tools/cmd/staticcheck 57 | ) 58 | -------------------------------------------------------------------------------- /internal/dnsbl/dnsbl.go: -------------------------------------------------------------------------------- 1 | package dnsbl 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | "net" 7 | "strings" 8 | ) 9 | 10 | //go:generate go tool golang.org/x/tools/cmd/stringer -type=DroneBLResponse 11 | 12 | type DroneBLResponse byte 13 | 14 | const ( 15 | AllGood DroneBLResponse = 0 16 | IRCDrone DroneBLResponse = 3 17 | Bottler DroneBLResponse = 5 18 | UnknownSpambotOrDrone DroneBLResponse = 6 19 | DDOSDrone DroneBLResponse = 7 20 | SOCKSProxy DroneBLResponse = 8 21 | HTTPProxy DroneBLResponse = 9 22 | ProxyChain DroneBLResponse = 10 23 | OpenProxy DroneBLResponse = 11 24 | OpenDNSResolver DroneBLResponse = 12 25 | BruteForceAttackers DroneBLResponse = 13 26 | OpenWingateProxy DroneBLResponse = 14 27 | CompromisedRouter DroneBLResponse = 15 28 | AutoRootingWorms DroneBLResponse = 16 29 | AutoDetectedBotIP DroneBLResponse = 17 30 | Unknown DroneBLResponse = 255 31 | ) 32 | 33 | func Reverse(ip net.IP) string { 34 | if ip.To4() != nil { 35 | return reverse4(ip) 36 | } 37 | 38 | return reverse6(ip) 39 | } 40 | 41 | func reverse4(ip net.IP) string { 42 | splitAddress := strings.Split(ip.String(), ".") 43 | 44 | // swap first and last octet 45 | splitAddress[0], splitAddress[3] = splitAddress[3], splitAddress[0] 46 | // swap middle octets 47 | splitAddress[1], splitAddress[2] = splitAddress[2], splitAddress[1] 48 | 49 | return strings.Join(splitAddress, ".") 50 | } 51 | 52 | func reverse6(ip net.IP) string { 53 | ipBytes := []byte(ip) 54 | var sb strings.Builder 55 | 56 | for i := len(ipBytes) - 1; i >= 0; i-- { 57 | // Split the byte into two nibbles 58 | highNibble := ipBytes[i] >> 4 59 | lowNibble := ipBytes[i] & 0x0F 60 | 61 | // Append the nibbles in reversed order 62 | sb.WriteString(fmt.Sprintf("%x.%x.", lowNibble, highNibble)) 63 | } 64 | 65 | return sb.String()[:len(sb.String())-1] 66 | } 67 | 68 | func Lookup(ipStr string) (DroneBLResponse, error) { 69 | ip := net.ParseIP(ipStr) 70 | if ip == nil { 71 | return Unknown, errors.New("dnsbl: input is not an IP address") 72 | } 73 | 74 | revIP := Reverse(ip) + ".dnsbl.dronebl.org" 75 | 76 | ips, err := net.LookupIP(revIP) 77 | if err != nil { 78 | var dnserr *net.DNSError 79 | if errors.As(err, &dnserr) { 80 | if dnserr.IsNotFound { 81 | return AllGood, nil 82 | } 83 | } 84 | 85 | return Unknown, err 86 | } 87 | 88 | if len(ips) != 0 { 89 | for _, ip := range ips { 90 | return DroneBLResponse(ip.To4()[3]), nil 91 | } 92 | } 93 | 94 | return UnknownSpambotOrDrone, nil 95 | } 96 | -------------------------------------------------------------------------------- /internal/dnsbl/dnsbl_test.go: -------------------------------------------------------------------------------- 1 | package dnsbl 2 | 3 | import ( 4 | "fmt" 5 | "net" 6 | "os" 7 | "testing" 8 | ) 9 | 10 | func TestReverse4(t *testing.T) { 11 | cases := []struct { 12 | inp, out string 13 | }{ 14 | {"1.2.3.4", "4.3.2.1"}, 15 | } 16 | 17 | for _, cs := range cases { 18 | t.Run(fmt.Sprintf("%s->%s", cs.inp, cs.out), func(t *testing.T) { 19 | out := reverse4(net.ParseIP(cs.inp)) 20 | 21 | if out != cs.out { 22 | t.Errorf("wanted %s\ngot: %s", cs.out, out) 23 | } 24 | }) 25 | } 26 | } 27 | 28 | func TestReverse6(t *testing.T) { 29 | cases := []struct { 30 | inp, out string 31 | }{ 32 | { 33 | inp: "1234:5678:9ABC:DEF0:1234:5678:9ABC:DEF0", 34 | out: "0.f.e.d.c.b.a.9.8.7.6.5.4.3.2.1.0.f.e.d.c.b.a.9.8.7.6.5.4.3.2.1", 35 | }, 36 | } 37 | 38 | for _, cs := range cases { 39 | t.Run(fmt.Sprintf("%s->%s", cs.inp, cs.out), func(t *testing.T) { 40 | out := reverse6(net.ParseIP(cs.inp)) 41 | 42 | if out != cs.out { 43 | t.Errorf("wanted %s, got: %s", cs.out, out) 44 | } 45 | }) 46 | } 47 | } 48 | 49 | func TestLookup(t *testing.T) { 50 | if os.Getenv("DONT_USE_NETWORK") != "" { 51 | t.Skip("test requires network egress") 52 | return 53 | } 54 | 55 | resp, err := Lookup("27.65.243.194") 56 | if err != nil { 57 | t.Fatalf("it broked: %v", err) 58 | } 59 | 60 | t.Logf("response: %d", resp) 61 | } 62 | -------------------------------------------------------------------------------- /internal/dnsbl/droneblresponse_string.go: -------------------------------------------------------------------------------- 1 | // Code generated by "stringer -type=DroneBLResponse"; DO NOT EDIT. 2 | 3 | package dnsbl 4 | 5 | import "strconv" 6 | 7 | func _() { 8 | // An "invalid array index" compiler error signifies that the constant values have changed. 9 | // Re-run the stringer command to generate them again. 10 | var x [1]struct{} 11 | _ = x[AllGood-0] 12 | _ = x[IRCDrone-3] 13 | _ = x[Bottler-5] 14 | _ = x[UnknownSpambotOrDrone-6] 15 | _ = x[DDOSDrone-7] 16 | _ = x[SOCKSProxy-8] 17 | _ = x[HTTPProxy-9] 18 | _ = x[ProxyChain-10] 19 | _ = x[OpenProxy-11] 20 | _ = x[OpenDNSResolver-12] 21 | _ = x[BruteForceAttackers-13] 22 | _ = x[OpenWingateProxy-14] 23 | _ = x[CompromisedRouter-15] 24 | _ = x[AutoRootingWorms-16] 25 | _ = x[AutoDetectedBotIP-17] 26 | _ = x[Unknown-255] 27 | } 28 | 29 | const ( 30 | _DroneBLResponse_name_0 = "AllGood" 31 | _DroneBLResponse_name_1 = "IRCDrone" 32 | _DroneBLResponse_name_2 = "BottlerUnknownSpambotOrDroneDDOSDroneSOCKSProxyHTTPProxyProxyChainOpenProxyOpenDNSResolverBruteForceAttackersOpenWingateProxyCompromisedRouterAutoRootingWormsAutoDetectedBotIP" 33 | _DroneBLResponse_name_3 = "Unknown" 34 | ) 35 | 36 | var ( 37 | _DroneBLResponse_index_2 = [...]uint8{0, 7, 28, 37, 47, 56, 66, 75, 90, 109, 125, 142, 158, 175} 38 | ) 39 | 40 | func (i DroneBLResponse) String() string { 41 | switch { 42 | case i == 0: 43 | return _DroneBLResponse_name_0 44 | case i == 3: 45 | return _DroneBLResponse_name_1 46 | case 5 <= i && i <= 17: 47 | i -= 5 48 | return _DroneBLResponse_name_2[_DroneBLResponse_index_2[i]:_DroneBLResponse_index_2[i+1]] 49 | case i == 255: 50 | return _DroneBLResponse_name_3 51 | default: 52 | return "DroneBLResponse(" + strconv.FormatInt(int64(i), 10) + ")" 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /internal/hash.go: -------------------------------------------------------------------------------- 1 | package internal 2 | 3 | import ( 4 | "crypto/sha256" 5 | "encoding/hex" 6 | ) 7 | 8 | func SHA256sum(text string) string { 9 | hash := sha256.New() 10 | hash.Write([]byte(text)) 11 | return hex.EncodeToString(hash.Sum(nil)) 12 | } 13 | -------------------------------------------------------------------------------- /internal/headers.go: -------------------------------------------------------------------------------- 1 | package internal 2 | 3 | import ( 4 | "log/slog" 5 | "net" 6 | "net/http" 7 | "strings" 8 | 9 | "github.com/TecharoHQ/anubis" 10 | "github.com/sebest/xff" 11 | ) 12 | 13 | // UnchangingCache sets the Cache-Control header to cache a response for 1 year if 14 | // and only if the application is compiled in "release" mode by Docker. 15 | func UnchangingCache(next http.Handler) http.Handler { 16 | //goland:noinspection GoBoolExpressions 17 | if anubis.Version == "devel" { 18 | return next 19 | } 20 | 21 | return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 22 | w.Header().Set("Cache-Control", "public, max-age=31536000") 23 | next.ServeHTTP(w, r) 24 | }) 25 | } 26 | 27 | // RemoteXRealIP sets the X-Real-Ip header to the request's real IP if 28 | // the setting is enabled by the user. 29 | func RemoteXRealIP(useRemoteAddress bool, bindNetwork string, next http.Handler) http.Handler { 30 | if !useRemoteAddress { 31 | slog.Debug("skipping middleware, useRemoteAddress is empty") 32 | return next 33 | } 34 | 35 | if bindNetwork == "unix" { 36 | // For local sockets there is no real remote address but the localhost 37 | // address should be sensible. 38 | return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 39 | r.Header.Set("X-Real-Ip", "127.0.0.1") 40 | next.ServeHTTP(w, r) 41 | }) 42 | } 43 | 44 | return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 45 | host, _, err := net.SplitHostPort(r.RemoteAddr) 46 | if err != nil { 47 | panic(err) // this should never happen 48 | } 49 | r.Header.Set("X-Real-Ip", host) 50 | next.ServeHTTP(w, r) 51 | }) 52 | } 53 | 54 | // XForwardedForToXRealIP sets the X-Real-Ip header based on the contents 55 | // of the X-Forwarded-For header. 56 | func XForwardedForToXRealIP(next http.Handler) http.Handler { 57 | return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 58 | if xffHeader := r.Header.Get("X-Forwarded-For"); r.Header.Get("X-Real-Ip") == "" && xffHeader != "" { 59 | ip := xff.Parse(xffHeader) 60 | slog.Debug("setting x-real-ip", "val", ip) 61 | r.Header.Set("X-Real-Ip", ip) 62 | } 63 | 64 | next.ServeHTTP(w, r) 65 | }) 66 | } 67 | 68 | // XForwardedForUpdate sets or updates the X-Forwarded-For header, adding 69 | // the known remote address to an existing chain if present 70 | func XForwardedForUpdate(next http.Handler) http.Handler { 71 | return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 72 | defer next.ServeHTTP(w, r) 73 | 74 | remoteIP, _, err := net.SplitHostPort(r.RemoteAddr) 75 | 76 | if parsedRemoteIP := net.ParseIP(remoteIP); parsedRemoteIP != nil && parsedRemoteIP.IsLoopback() { 77 | // anubis is likely deployed behind a local reverse proxy 78 | // pass header as-is to not break existing applications 79 | return 80 | } 81 | 82 | if err != nil { 83 | slog.Warn("The default format of request.RemoteAddr should be IP:Port", "remoteAddr", r.RemoteAddr) 84 | return 85 | } 86 | if xff := r.Header.Get("X-Forwarded-For"); xff != "" { 87 | forwardedList := strings.Split(",", xff) 88 | forwardedList = append(forwardedList, remoteIP) 89 | // this behavior is equivalent to 90 | // ingress-nginx "compute-full-forwarded-for" 91 | // https://kubernetes.github.io/ingress-nginx/user-guide/nginx-configuration/configmap/#compute-full-forwarded-for 92 | // 93 | // this would be the correct place to strip and/or flatten this list 94 | // 95 | // strip - iterate backwards and eliminate configured trusted IPs 96 | // flatten - only return the last element to avoid spoofing confusion 97 | // 98 | // many applications handle this in different ways, but 99 | // generally they'd be expected to do these two things on 100 | // their own end to find the first non-spoofed IP 101 | r.Header.Set("X-Forwarded-For", strings.Join(forwardedList, ",")) 102 | } else { 103 | r.Header.Set("X-Forwarded-For", remoteIP) 104 | } 105 | }) 106 | } 107 | 108 | // NoStoreCache sets the Cache-Control header to no-store for the response. 109 | func NoStoreCache(next http.Handler) http.Handler { 110 | return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 111 | w.Header().Set("Cache-Control", "no-store") 112 | next.ServeHTTP(w, r) 113 | }) 114 | } 115 | 116 | // NoBrowsing prevents directory browsing by returning a 404 for any request that ends with a "/". 117 | func NoBrowsing(next http.Handler) http.Handler { 118 | return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 119 | if strings.HasSuffix(r.URL.Path, "/") { 120 | http.NotFound(w, r) 121 | return 122 | } 123 | next.ServeHTTP(w, r) 124 | }) 125 | } 126 | -------------------------------------------------------------------------------- /internal/ogtags/cache.go: -------------------------------------------------------------------------------- 1 | package ogtags 2 | 3 | import ( 4 | "errors" 5 | "log/slog" 6 | "net/url" 7 | "syscall" 8 | ) 9 | 10 | // GetOGTags is the main function that retrieves Open Graph tags for a URL 11 | func (c *OGTagCache) GetOGTags(url *url.URL) (map[string]string, error) { 12 | if url == nil { 13 | return nil, errors.New("nil URL provided, cannot fetch OG tags") 14 | } 15 | urlStr := c.getTarget(url) 16 | // Check cache first 17 | if cachedTags := c.checkCache(urlStr); cachedTags != nil { 18 | return cachedTags, nil 19 | } 20 | 21 | // Fetch HTML content 22 | doc, err := c.fetchHTMLDocument(urlStr) 23 | if errors.Is(err, syscall.ECONNREFUSED) { 24 | slog.Debug("Connection refused, returning empty tags") 25 | return nil, nil 26 | } else if errors.Is(err, ErrOgHandled) { 27 | // Error was handled in fetchHTMLDocument, return empty tags 28 | return nil, nil 29 | } 30 | if err != nil { 31 | return nil, err 32 | } 33 | 34 | // Extract OG tags 35 | ogTags := c.extractOGTags(doc) 36 | 37 | // Store in cache 38 | c.cache.Set(urlStr, ogTags, c.ogTimeToLive) 39 | 40 | return ogTags, nil 41 | } 42 | 43 | // checkCache checks if we have the tags cached and returns them if so 44 | func (c *OGTagCache) checkCache(urlStr string) map[string]string { 45 | if cachedTags, ok := c.cache.Get(urlStr); ok { 46 | slog.Debug("cache hit", "tags", cachedTags) 47 | return cachedTags 48 | } 49 | slog.Debug("cache miss", "url", urlStr) 50 | return nil 51 | } 52 | -------------------------------------------------------------------------------- /internal/ogtags/cache_test.go: -------------------------------------------------------------------------------- 1 | package ogtags 2 | 3 | import ( 4 | "net/http" 5 | "net/http/httptest" 6 | "net/url" 7 | "testing" 8 | "time" 9 | ) 10 | 11 | func TestCheckCache(t *testing.T) { 12 | cache := NewOGTagCache("http://example.com", true, time.Minute) 13 | 14 | // Set up test data 15 | urlStr := "http://example.com/page" 16 | expectedTags := map[string]string{ 17 | "og:title": "Test Title", 18 | "og:description": "Test Description", 19 | } 20 | 21 | // Test cache miss 22 | tags := cache.checkCache(urlStr) 23 | if tags != nil { 24 | t.Errorf("expected nil tags on cache miss, got %v", tags) 25 | } 26 | 27 | // Manually add to cache 28 | cache.cache.Set(urlStr, expectedTags, time.Minute) 29 | 30 | // Test cache hit 31 | tags = cache.checkCache(urlStr) 32 | if tags == nil { 33 | t.Fatal("expected non-nil tags on cache hit, got nil") 34 | } 35 | 36 | for key, expectedValue := range expectedTags { 37 | if value, ok := tags[key]; !ok || value != expectedValue { 38 | t.Errorf("expected %s: %s, got: %s", key, expectedValue, value) 39 | } 40 | } 41 | } 42 | 43 | func TestGetOGTags(t *testing.T) { 44 | var loadCount int // Counter to track how many times the test route is loaded 45 | 46 | // Create a test server to serve a sample HTML page with OG tags 47 | ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 48 | loadCount++ 49 | if loadCount > 1 { 50 | t.Fatalf("Test route loaded more than once, cache failed") 51 | } 52 | w.Header().Set("Content-Type", "text/html") 53 | w.Write([]byte(` 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 |

Hello, world!

63 | 64 | 65 | `)) 66 | })) 67 | defer ts.Close() 68 | 69 | // Create an instance of OGTagCache with a short TTL for testing 70 | cache := NewOGTagCache(ts.URL, true, 1*time.Minute) 71 | 72 | // Parse the test server URL 73 | parsedURL, err := url.Parse(ts.URL) 74 | if err != nil { 75 | t.Fatalf("failed to parse test server URL: %v", err) 76 | } 77 | 78 | // Test fetching OG tags from the test server 79 | ogTags, err := cache.GetOGTags(parsedURL) 80 | if err != nil { 81 | t.Fatalf("failed to get OG tags: %v", err) 82 | } 83 | 84 | // Verify the fetched OG tags 85 | expectedTags := map[string]string{ 86 | "og:title": "Test Title", 87 | "og:description": "Test Description", 88 | "og:image": "http://example.com/image.jpg", 89 | } 90 | 91 | for key, expectedValue := range expectedTags { 92 | if value, ok := ogTags[key]; !ok || value != expectedValue { 93 | t.Errorf("expected %s: %s, got: %s", key, expectedValue, value) 94 | } 95 | } 96 | 97 | // Test fetching OG tags from the cache 98 | ogTags, err = cache.GetOGTags(parsedURL) 99 | if err != nil { 100 | t.Fatalf("failed to get OG tags from cache: %v", err) 101 | } 102 | 103 | // Test fetching OG tags from the cache (3rd time) 104 | newOgTags, err := cache.GetOGTags(parsedURL) 105 | if err != nil { 106 | t.Fatalf("failed to get OG tags from cache: %v", err) 107 | } 108 | 109 | // Verify the cached OG tags 110 | for key, expectedValue := range expectedTags { 111 | if value, ok := ogTags[key]; !ok || value != expectedValue { 112 | t.Errorf("expected %s: %s, got: %s", key, expectedValue, value) 113 | } 114 | 115 | initialValue := ogTags[key] 116 | cachedValue, ok := newOgTags[key] 117 | if !ok || initialValue != cachedValue { 118 | t.Errorf("Cache does not line up: expected %s: %s, got: %s", key, initialValue, cachedValue) 119 | } 120 | 121 | } 122 | } 123 | -------------------------------------------------------------------------------- /internal/ogtags/fetch.go: -------------------------------------------------------------------------------- 1 | package ogtags 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | "golang.org/x/net/html" 7 | "io" 8 | "log/slog" 9 | "mime" 10 | "net" 11 | "net/http" 12 | ) 13 | 14 | var ( 15 | ErrOgHandled = errors.New("og: handled error") // used to indicate that the error was handled and should not be logged 16 | emptyMap = map[string]string{} // used to indicate an empty result in the cache. Can't use nil as it would be a cache miss. 17 | ) 18 | 19 | func (c *OGTagCache) fetchHTMLDocument(urlStr string) (*html.Node, error) { 20 | resp, err := c.client.Get(urlStr) 21 | if err != nil { 22 | var netErr net.Error 23 | if errors.As(err, &netErr) && netErr.Timeout() { 24 | slog.Debug("og: request timed out", "url", urlStr) 25 | c.cache.Set(urlStr, emptyMap, c.ogTimeToLive/2) // Cache empty result for half the TTL to not spam the server 26 | } 27 | return nil, fmt.Errorf("http get failed: %w", err) 28 | } 29 | // this defer will call MaxBytesReader's Close, which closes the original body. 30 | defer func(Body io.ReadCloser) { 31 | err := Body.Close() 32 | if err != nil { 33 | slog.Debug("og: error closing response body", "url", urlStr, "error", err) 34 | } 35 | }(resp.Body) 36 | 37 | if resp.StatusCode != http.StatusOK { 38 | slog.Debug("og: received non-OK status code", "url", urlStr, "status", resp.StatusCode) 39 | c.cache.Set(urlStr, emptyMap, c.ogTimeToLive) // Cache empty result for non-successful status codes 40 | return nil, fmt.Errorf("%w: page not found", ErrOgHandled) 41 | } 42 | 43 | // Check content type 44 | ct := resp.Header.Get("Content-Type") 45 | if ct == "" { 46 | // assume non html body 47 | return nil, fmt.Errorf("missing Content-Type header") 48 | } else { 49 | mediaType, _, err := mime.ParseMediaType(ct) 50 | if err != nil { 51 | // Malformed Content-Type header 52 | slog.Debug("og: malformed Content-Type header", "url", urlStr, "contentType", ct) 53 | return nil, fmt.Errorf("%w malformed Content-Type header: %w", ErrOgHandled, err) 54 | } 55 | 56 | if mediaType != "text/html" && mediaType != "application/xhtml+xml" { 57 | slog.Debug("og: unsupported Content-Type", "url", urlStr, "contentType", mediaType) 58 | return nil, fmt.Errorf("%w unsupported Content-Type: %s", ErrOgHandled, mediaType) 59 | } 60 | } 61 | 62 | resp.Body = http.MaxBytesReader(nil, resp.Body, c.maxContentLength) 63 | 64 | doc, err := html.Parse(resp.Body) 65 | if err != nil { 66 | // Check if the error is specifically because the limit was exceeded 67 | var maxBytesErr *http.MaxBytesError 68 | if errors.As(err, &maxBytesErr) { 69 | slog.Debug("og: content exceeded max length", "url", urlStr, "limit", c.maxContentLength) 70 | return nil, fmt.Errorf("content too large: exceeded %d bytes", c.maxContentLength) 71 | } 72 | // parsing error (e.g., malformed HTML) 73 | return nil, fmt.Errorf("failed to parse HTML: %w", err) 74 | } 75 | 76 | return doc, nil 77 | } 78 | -------------------------------------------------------------------------------- /internal/ogtags/fetch_test.go: -------------------------------------------------------------------------------- 1 | package ogtags 2 | 3 | import ( 4 | "fmt" 5 | "io" 6 | "net/http" 7 | "net/http/httptest" 8 | "os" 9 | "strings" 10 | "testing" 11 | "time" 12 | ) 13 | 14 | func TestFetchHTMLDocument(t *testing.T) { 15 | tests := []struct { 16 | name string 17 | htmlContent string 18 | contentType string 19 | statusCode int 20 | contentLength int64 21 | expectError bool 22 | }{ 23 | { 24 | name: "Valid HTML", 25 | htmlContent: ` 26 | 27 | Test 28 |

Test content

29 | `, 30 | contentType: "text/html", 31 | statusCode: http.StatusOK, 32 | expectError: false, 33 | }, 34 | { 35 | name: "Empty HTML", 36 | htmlContent: "", 37 | contentType: "text/html", 38 | statusCode: http.StatusOK, 39 | expectError: false, 40 | }, 41 | { 42 | name: "Not found error", 43 | htmlContent: "", 44 | contentType: "text/html", 45 | statusCode: http.StatusNotFound, 46 | expectError: true, 47 | }, 48 | { 49 | name: "Unsupported Content-Type", 50 | htmlContent: "*Insert rick roll here*", 51 | contentType: "video/mp4", 52 | statusCode: http.StatusOK, 53 | expectError: true, 54 | }, 55 | { 56 | name: "Too large content", 57 | contentType: "text/html", 58 | statusCode: http.StatusOK, 59 | expectError: true, 60 | contentLength: 5 * 1024 * 1024, // 5MB (over 2MB limit) 61 | }, 62 | } 63 | 64 | for _, tt := range tests { 65 | t.Run(tt.name, func(t *testing.T) { 66 | ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 67 | if tt.contentType != "" { 68 | w.Header().Set("Content-Type", tt.contentType) 69 | } 70 | if tt.contentLength > 0 { 71 | // Simulate content length but avoid sending too much actual data 72 | w.Header().Set("Content-Length", fmt.Sprintf("%d", tt.contentLength)) 73 | io.CopyN(w, strings.NewReader("X"), tt.contentLength) 74 | } else { 75 | w.WriteHeader(tt.statusCode) 76 | w.Write([]byte(tt.htmlContent)) 77 | } 78 | })) 79 | defer ts.Close() 80 | 81 | cache := NewOGTagCache("", true, time.Minute) 82 | doc, err := cache.fetchHTMLDocument(ts.URL) 83 | 84 | if tt.expectError { 85 | if err == nil { 86 | t.Error("expected error, got nil") 87 | } 88 | if doc != nil { 89 | t.Error("expected nil document on error, got non-nil") 90 | } 91 | } else { 92 | if err != nil { 93 | t.Errorf("unexpected error: %v", err) 94 | } 95 | if doc == nil { 96 | t.Error("expected non-nil document, got nil") 97 | } 98 | } 99 | }) 100 | } 101 | } 102 | 103 | func TestFetchHTMLDocumentInvalidURL(t *testing.T) { 104 | if os.Getenv("DONT_USE_NETWORK") != "" { 105 | t.Skip("test requires theoretical network egress") 106 | } 107 | 108 | cache := NewOGTagCache("", true, time.Minute) 109 | 110 | doc, err := cache.fetchHTMLDocument("http://invalid.url.that.doesnt.exist.example") 111 | 112 | if err == nil { 113 | t.Error("expected error for invalid URL, got nil") 114 | } 115 | 116 | if doc != nil { 117 | t.Error("expected nil document for invalid URL, got non-nil") 118 | } 119 | } 120 | -------------------------------------------------------------------------------- /internal/ogtags/integration_test.go: -------------------------------------------------------------------------------- 1 | package ogtags 2 | 3 | import ( 4 | "net/http" 5 | "net/http/httptest" 6 | "net/url" 7 | "testing" 8 | "time" 9 | ) 10 | 11 | func TestIntegrationGetOGTags(t *testing.T) { 12 | ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 13 | w.Header().Set("Content-Type", "text/html") 14 | 15 | switch r.URL.Path { 16 | case "/simple": 17 | w.Write([]byte(` 18 | 19 | 20 | 21 | 22 | 23 | 24 |

Simple page content

25 | 26 | `)) 27 | case "/complete": 28 | w.Write([]byte(` 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 |

Complete page content

39 | 40 | `)) 41 | case "/no-og": 42 | w.Write([]byte(` 43 | 44 | 45 | 46 | No OG Tags 47 | 48 |

No OG tags here

49 | 50 | `)) 51 | default: 52 | w.WriteHeader(http.StatusNotFound) 53 | } 54 | })) 55 | defer ts.Close() 56 | 57 | // Test with different configurations 58 | testCases := []struct { 59 | name string 60 | path string 61 | query string 62 | expectedTags map[string]string 63 | expectError bool 64 | }{ 65 | { 66 | name: "Simple page", 67 | path: "/simple", 68 | query: "", 69 | expectedTags: map[string]string{ 70 | "og:title": "Simple Page", 71 | "og:type": "website", 72 | }, 73 | expectError: false, 74 | }, 75 | { 76 | name: "Complete page", 77 | path: "/complete", 78 | query: "ref=test", 79 | expectedTags: map[string]string{ 80 | "og:title": "Complete Page", 81 | "og:description": "A page with many OG tags", 82 | "og:image": "http://example.com/image.jpg", 83 | "og:url": "http://example.com/complete", 84 | "og:type": "article", 85 | }, 86 | expectError: false, 87 | }, 88 | { 89 | name: "Page with no OG tags", 90 | path: "/no-og", 91 | query: "", 92 | expectedTags: map[string]string{}, 93 | expectError: false, 94 | }, 95 | { 96 | name: "Non-existent page", 97 | path: "/not-found", 98 | query: "", 99 | expectedTags: nil, 100 | expectError: false, 101 | }, 102 | } 103 | 104 | for _, tc := range testCases { 105 | t.Run(tc.name, func(t *testing.T) { 106 | // Create cache instance 107 | cache := NewOGTagCache(ts.URL, true, 1*time.Minute) 108 | 109 | // Create URL for test 110 | testURL, _ := url.Parse(ts.URL) 111 | testURL.Path = tc.path 112 | testURL.RawQuery = tc.query 113 | 114 | // Get OG tags 115 | ogTags, err := cache.GetOGTags(testURL) 116 | 117 | // Check error expectation 118 | if tc.expectError { 119 | if err == nil { 120 | t.Error("expected error, got nil") 121 | } 122 | return 123 | } 124 | 125 | if err != nil { 126 | t.Fatalf("unexpected error: %v", err) 127 | } 128 | 129 | // Verify all expected tags are present 130 | for key, expectedValue := range tc.expectedTags { 131 | if value, ok := ogTags[key]; !ok || value != expectedValue { 132 | t.Errorf("expected %s: %s, got: %s", key, expectedValue, value) 133 | } 134 | } 135 | 136 | // Verify no extra tags are present 137 | if len(ogTags) != len(tc.expectedTags) { 138 | t.Errorf("expected %d tags, got %d", len(tc.expectedTags), len(ogTags)) 139 | } 140 | 141 | // Test cache retrieval 142 | cachedOGTags, err := cache.GetOGTags(testURL) 143 | if err != nil { 144 | t.Fatalf("failed to get OG tags from cache: %v", err) 145 | } 146 | 147 | // Verify cached tags match 148 | for key, expectedValue := range tc.expectedTags { 149 | if value, ok := cachedOGTags[key]; !ok || value != expectedValue { 150 | t.Errorf("cached value - expected %s: %s, got: %s", key, expectedValue, value) 151 | } 152 | } 153 | }) 154 | } 155 | } 156 | -------------------------------------------------------------------------------- /internal/ogtags/ogtags.go: -------------------------------------------------------------------------------- 1 | package ogtags 2 | 3 | import ( 4 | "net/http" 5 | "net/url" 6 | "time" 7 | 8 | "github.com/TecharoHQ/anubis/decaymap" 9 | ) 10 | 11 | type OGTagCache struct { 12 | cache *decaymap.Impl[string, map[string]string] 13 | target string 14 | ogPassthrough bool 15 | ogTimeToLive time.Duration 16 | approvedTags []string 17 | approvedPrefixes []string 18 | client *http.Client 19 | maxContentLength int64 20 | } 21 | 22 | func NewOGTagCache(target string, ogPassthrough bool, ogTimeToLive time.Duration) *OGTagCache { 23 | // Predefined approved tags and prefixes 24 | // In the future, these could come from configuration 25 | defaultApprovedTags := []string{"description", "keywords", "author"} 26 | defaultApprovedPrefixes := []string{"og:", "twitter:", "fediverse:"} 27 | client := &http.Client{ 28 | Timeout: 5 * time.Second, /*make this configurable?*/ 29 | } 30 | 31 | const maxContentLength = 16 << 20 // 16 MiB in bytes 32 | 33 | return &OGTagCache{ 34 | cache: decaymap.New[string, map[string]string](), 35 | target: target, 36 | ogPassthrough: ogPassthrough, 37 | ogTimeToLive: ogTimeToLive, 38 | approvedTags: defaultApprovedTags, 39 | approvedPrefixes: defaultApprovedPrefixes, 40 | client: client, 41 | maxContentLength: maxContentLength, 42 | } 43 | } 44 | 45 | func (c *OGTagCache) getTarget(u *url.URL) string { 46 | return c.target + u.Path 47 | } 48 | 49 | func (c *OGTagCache) Cleanup() { 50 | c.cache.Cleanup() 51 | } 52 | -------------------------------------------------------------------------------- /internal/ogtags/ogtags_test.go: -------------------------------------------------------------------------------- 1 | package ogtags 2 | 3 | import ( 4 | "net/url" 5 | "testing" 6 | "time" 7 | ) 8 | 9 | func TestNewOGTagCache(t *testing.T) { 10 | tests := []struct { 11 | name string 12 | target string 13 | ogPassthrough bool 14 | ogTimeToLive time.Duration 15 | }{ 16 | { 17 | name: "Basic initialization", 18 | target: "http://example.com", 19 | ogPassthrough: true, 20 | ogTimeToLive: 5 * time.Minute, 21 | }, 22 | { 23 | name: "Empty target", 24 | target: "", 25 | ogPassthrough: false, 26 | ogTimeToLive: 10 * time.Minute, 27 | }, 28 | } 29 | 30 | for _, tt := range tests { 31 | t.Run(tt.name, func(t *testing.T) { 32 | cache := NewOGTagCache(tt.target, tt.ogPassthrough, tt.ogTimeToLive) 33 | 34 | if cache == nil { 35 | t.Fatal("expected non-nil cache, got nil") 36 | } 37 | 38 | if cache.target != tt.target { 39 | t.Errorf("expected target %s, got %s", tt.target, cache.target) 40 | } 41 | 42 | if cache.ogPassthrough != tt.ogPassthrough { 43 | t.Errorf("expected ogPassthrough %v, got %v", tt.ogPassthrough, cache.ogPassthrough) 44 | } 45 | 46 | if cache.ogTimeToLive != tt.ogTimeToLive { 47 | t.Errorf("expected ogTimeToLive %v, got %v", tt.ogTimeToLive, cache.ogTimeToLive) 48 | } 49 | }) 50 | } 51 | } 52 | 53 | func TestGetTarget(t *testing.T) { 54 | tests := []struct { 55 | name string 56 | target string 57 | path string 58 | query string 59 | expected string 60 | }{ 61 | { 62 | name: "No path or query", 63 | target: "http://example.com", 64 | path: "", 65 | query: "", 66 | expected: "http://example.com", 67 | }, 68 | { 69 | name: "With complex path", 70 | target: "http://example.com", 71 | path: "/pag(#*((#@)ΓΓΓΓe/Γ", 72 | query: "id=123", 73 | expected: "http://example.com/pag(#*((#@)ΓΓΓΓe/Γ", 74 | }, 75 | { 76 | name: "With query and path", 77 | target: "http://example.com", 78 | path: "/page", 79 | query: "id=123", 80 | expected: "http://example.com/page", 81 | }, 82 | } 83 | 84 | for _, tt := range tests { 85 | t.Run(tt.name, func(t *testing.T) { 86 | cache := NewOGTagCache(tt.target, false, time.Minute) 87 | 88 | u := &url.URL{ 89 | Path: tt.path, 90 | RawQuery: tt.query, 91 | } 92 | 93 | result := cache.getTarget(u) 94 | 95 | if result != tt.expected { 96 | t.Errorf("expected %s, got %s", tt.expected, result) 97 | } 98 | }) 99 | } 100 | } 101 | -------------------------------------------------------------------------------- /internal/ogtags/parse.go: -------------------------------------------------------------------------------- 1 | package ogtags 2 | 3 | import ( 4 | "strings" 5 | 6 | "golang.org/x/net/html" 7 | ) 8 | 9 | // extractOGTags traverses the HTML document and extracts approved Open Graph tags 10 | func (c *OGTagCache) extractOGTags(doc *html.Node) map[string]string { 11 | ogTags := make(map[string]string) 12 | 13 | var traverseNodes func(*html.Node) 14 | traverseNodes = func(n *html.Node) { 15 | // isOGMetaTag only checks if it's a tag. 16 | // The actual filtering happens in extractMetaTagInfo now. 17 | if isOGMetaTag(n) { 18 | property, content := c.extractMetaTagInfo(n) 19 | if property != "" { 20 | ogTags[property] = content 21 | } 22 | } 23 | 24 | for child := n.FirstChild; child != nil; child = child.NextSibling { 25 | traverseNodes(child) 26 | } 27 | } 28 | 29 | traverseNodes(doc) 30 | return ogTags 31 | } 32 | 33 | // isOGMetaTag checks if a node is *any* meta tag 34 | func isOGMetaTag(n *html.Node) bool { 35 | if n == nil { 36 | return false 37 | } 38 | return n.Type == html.ElementNode && n.Data == "meta" 39 | } 40 | 41 | // extractMetaTagInfo extracts property and content from a meta tag 42 | // *and* checks if the property is approved. 43 | // Returns empty property string if the tag is not approved. 44 | func (c *OGTagCache) extractMetaTagInfo(n *html.Node) (property, content string) { 45 | var rawProperty string // Store the property found before approval check 46 | 47 | for _, attr := range n.Attr { 48 | if attr.Key == "property" || attr.Key == "name" { 49 | rawProperty = attr.Val 50 | } 51 | if attr.Key == "content" { 52 | content = attr.Val 53 | } 54 | } 55 | 56 | // Check if the rawProperty is approved 57 | isApproved := false 58 | for _, prefix := range c.approvedPrefixes { 59 | if strings.HasPrefix(rawProperty, prefix) { 60 | isApproved = true 61 | break 62 | } 63 | } 64 | // Check exact approved tags if not already approved by prefix 65 | if !isApproved { 66 | for _, tag := range c.approvedTags { 67 | if rawProperty == tag { 68 | isApproved = true 69 | break 70 | } 71 | } 72 | } 73 | 74 | // Only return the property if it's approved 75 | if isApproved { 76 | property = rawProperty 77 | } 78 | 79 | // Content is returned regardless, but property will be "" if not approved 80 | return property, content 81 | } 82 | -------------------------------------------------------------------------------- /internal/slog.go: -------------------------------------------------------------------------------- 1 | package internal 2 | 3 | import ( 4 | "fmt" 5 | "log/slog" 6 | "os" 7 | ) 8 | 9 | func InitSlog(level string) { 10 | var programLevel slog.Level 11 | if err := (&programLevel).UnmarshalText([]byte(level)); err != nil { 12 | fmt.Fprintf(os.Stderr, "invalid log level %s: %v, using info\n", level, err) 13 | programLevel = slog.LevelInfo 14 | } 15 | 16 | leveler := &slog.LevelVar{} 17 | leveler.Set(programLevel) 18 | 19 | h := slog.NewJSONHandler(os.Stderr, &slog.HandlerOptions{ 20 | AddSource: true, 21 | Level: leveler, 22 | }) 23 | slog.SetDefault(slog.New(h)) 24 | } 25 | -------------------------------------------------------------------------------- /internal/test/var/.gitignore: -------------------------------------------------------------------------------- 1 | *.png 2 | *.txt 3 | *.html -------------------------------------------------------------------------------- /lib/http.go: -------------------------------------------------------------------------------- 1 | package lib 2 | 3 | import ( 4 | "net/http" 5 | "time" 6 | 7 | "github.com/TecharoHQ/anubis" 8 | ) 9 | 10 | func (s *Server) ClearCookie(w http.ResponseWriter) { 11 | http.SetCookie(w, &http.Cookie{ 12 | Name: anubis.CookieName, 13 | Value: "", 14 | Expires: time.Now().Add(-1 * time.Hour), 15 | MaxAge: -1, 16 | SameSite: http.SameSiteLaxMode, 17 | Domain: s.opts.CookieDomain, 18 | }) 19 | } 20 | 21 | // https://github.com/oauth2-proxy/oauth2-proxy/blob/master/pkg/upstream/http.go#L124 22 | type UnixRoundTripper struct { 23 | Transport *http.Transport 24 | } 25 | 26 | // set bare minimum stuff 27 | func (t UnixRoundTripper) RoundTrip(req *http.Request) (*http.Response, error) { 28 | req = req.Clone(req.Context()) 29 | if req.Host == "" { 30 | req.Host = "localhost" 31 | } 32 | req.URL.Host = req.Host // proxy error: no Host in request URL 33 | req.URL.Scheme = "http" // make http.Transport happy and avoid an infinite recursion 34 | return t.Transport.RoundTrip(req) 35 | } 36 | -------------------------------------------------------------------------------- /lib/policy/bot.go: -------------------------------------------------------------------------------- 1 | package policy 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/TecharoHQ/anubis/internal" 7 | "github.com/TecharoHQ/anubis/lib/policy/config" 8 | ) 9 | 10 | type Bot struct { 11 | Name string 12 | Action config.Rule 13 | Challenge *config.ChallengeRules 14 | Rules Checker 15 | } 16 | 17 | func (b Bot) Hash() string { 18 | return internal.SHA256sum(fmt.Sprintf("%s::%s", b.Name, b.Rules.Hash())) 19 | } 20 | -------------------------------------------------------------------------------- /lib/policy/checker.go: -------------------------------------------------------------------------------- 1 | package policy 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | "net" 7 | "net/http" 8 | "regexp" 9 | "strings" 10 | 11 | "github.com/TecharoHQ/anubis/internal" 12 | "github.com/yl2chen/cidranger" 13 | ) 14 | 15 | var ( 16 | ErrMisconfiguration = errors.New("[unexpected] policy: administrator misconfiguration") 17 | ) 18 | 19 | type Checker interface { 20 | Check(*http.Request) (bool, error) 21 | Hash() string 22 | } 23 | 24 | type CheckerList []Checker 25 | 26 | func (cl CheckerList) Check(r *http.Request) (bool, error) { 27 | for _, c := range cl { 28 | ok, err := c.Check(r) 29 | if err != nil { 30 | return ok, err 31 | } 32 | if ok { 33 | return ok, nil 34 | } 35 | } 36 | 37 | return false, nil 38 | } 39 | 40 | func (cl CheckerList) Hash() string { 41 | var sb strings.Builder 42 | 43 | for _, c := range cl { 44 | fmt.Fprintln(&sb, c.Hash()) 45 | } 46 | 47 | return internal.SHA256sum(sb.String()) 48 | } 49 | 50 | type RemoteAddrChecker struct { 51 | ranger cidranger.Ranger 52 | hash string 53 | } 54 | 55 | func NewRemoteAddrChecker(cidrs []string) (Checker, error) { 56 | ranger := cidranger.NewPCTrieRanger() 57 | var sb strings.Builder 58 | 59 | for _, cidr := range cidrs { 60 | _, rng, err := net.ParseCIDR(cidr) 61 | if err != nil { 62 | return nil, fmt.Errorf("%w: range %s not parsing: %w", ErrMisconfiguration, cidr, err) 63 | } 64 | 65 | ranger.Insert(cidranger.NewBasicRangerEntry(*rng)) 66 | fmt.Fprintln(&sb, cidr) 67 | } 68 | 69 | return &RemoteAddrChecker{ 70 | ranger: ranger, 71 | hash: internal.SHA256sum(sb.String()), 72 | }, nil 73 | } 74 | 75 | func (rac *RemoteAddrChecker) Check(r *http.Request) (bool, error) { 76 | host := r.Header.Get("X-Real-Ip") 77 | if host == "" { 78 | return false, fmt.Errorf("%w: header X-Real-Ip is not set", ErrMisconfiguration) 79 | } 80 | 81 | addr := net.ParseIP(host) 82 | if addr == nil { 83 | return false, fmt.Errorf("%w: %s is not an IP address", ErrMisconfiguration, host) 84 | } 85 | 86 | ok, err := rac.ranger.Contains(addr) 87 | if err != nil { 88 | return false, err 89 | } 90 | 91 | if ok { 92 | return true, nil 93 | } 94 | 95 | return false, nil 96 | } 97 | 98 | func (rac *RemoteAddrChecker) Hash() string { 99 | return rac.hash 100 | } 101 | 102 | type HeaderMatchesChecker struct { 103 | header string 104 | regexp *regexp.Regexp 105 | hash string 106 | } 107 | 108 | func NewUserAgentChecker(rexStr string) (Checker, error) { 109 | return NewHeaderMatchesChecker("User-Agent", rexStr) 110 | } 111 | 112 | func NewHeaderMatchesChecker(header, rexStr string) (Checker, error) { 113 | rex, err := regexp.Compile(rexStr) 114 | if err != nil { 115 | return nil, fmt.Errorf("%w: regex %s failed parse: %w", ErrMisconfiguration, rexStr, err) 116 | } 117 | return &HeaderMatchesChecker{header, rex, internal.SHA256sum(header + ": " + rexStr)}, nil 118 | } 119 | 120 | func (hmc *HeaderMatchesChecker) Check(r *http.Request) (bool, error) { 121 | if hmc.regexp.MatchString(r.Header.Get(hmc.header)) { 122 | return true, nil 123 | } 124 | 125 | return false, nil 126 | } 127 | 128 | func (hmc *HeaderMatchesChecker) Hash() string { 129 | return hmc.hash 130 | } 131 | 132 | type PathChecker struct { 133 | regexp *regexp.Regexp 134 | hash string 135 | } 136 | 137 | func NewPathChecker(rexStr string) (Checker, error) { 138 | rex, err := regexp.Compile(rexStr) 139 | if err != nil { 140 | return nil, fmt.Errorf("%w: regex %s failed parse: %w", ErrMisconfiguration, rexStr, err) 141 | } 142 | return &PathChecker{rex, internal.SHA256sum(rexStr)}, nil 143 | } 144 | 145 | func (pc *PathChecker) Check(r *http.Request) (bool, error) { 146 | if pc.regexp.MatchString(r.URL.Path) { 147 | return true, nil 148 | } 149 | 150 | return false, nil 151 | } 152 | 153 | func (pc *PathChecker) Hash() string { 154 | return pc.hash 155 | } 156 | 157 | func NewHeaderExistsChecker(key string) Checker { 158 | return headerExistsChecker{key} 159 | } 160 | 161 | type headerExistsChecker struct { 162 | header string 163 | } 164 | 165 | func (hec headerExistsChecker) Check(r *http.Request) (bool, error) { 166 | if r.Header.Get(hec.header) != "" { 167 | return true, nil 168 | } 169 | 170 | return false, nil 171 | } 172 | 173 | func (hec headerExistsChecker) Hash() string { 174 | return internal.SHA256sum(hec.header) 175 | } 176 | 177 | func NewHeadersChecker(headermap map[string]string) (Checker, error) { 178 | var result CheckerList 179 | var errs []error 180 | 181 | for key, rexStr := range headermap { 182 | if rexStr == ".*" { 183 | result = append(result, headerExistsChecker{key}) 184 | continue 185 | } 186 | 187 | rex, err := regexp.Compile(rexStr) 188 | if err != nil { 189 | errs = append(errs, fmt.Errorf("while compiling header %s regex %s: %w", key, rexStr, err)) 190 | continue 191 | } 192 | 193 | result = append(result, &HeaderMatchesChecker{key, rex, internal.SHA256sum(key + ": " + rexStr)}) 194 | } 195 | 196 | if len(errs) != 0 { 197 | return nil, errors.Join(errs...) 198 | } 199 | 200 | return result, nil 201 | } 202 | -------------------------------------------------------------------------------- /lib/policy/checker_test.go: -------------------------------------------------------------------------------- 1 | package policy 2 | 3 | import ( 4 | "errors" 5 | "net/http" 6 | "testing" 7 | ) 8 | 9 | func TestRemoteAddrChecker(t *testing.T) { 10 | for _, tt := range []struct { 11 | name string 12 | cidrs []string 13 | ip string 14 | ok bool 15 | err error 16 | }{ 17 | { 18 | name: "match_ipv4", 19 | cidrs: []string{"0.0.0.0/0"}, 20 | ip: "1.1.1.1", 21 | ok: true, 22 | err: nil, 23 | }, 24 | { 25 | name: "match_ipv6", 26 | cidrs: []string{"::/0"}, 27 | ip: "cafe:babe::", 28 | ok: true, 29 | err: nil, 30 | }, 31 | { 32 | name: "not_match_ipv4", 33 | cidrs: []string{"1.1.1.1/32"}, 34 | ip: "1.1.1.2", 35 | ok: false, 36 | err: nil, 37 | }, 38 | { 39 | name: "not_match_ipv6", 40 | cidrs: []string{"cafe:babe::/128"}, 41 | ip: "cafe:babe:4::/128", 42 | ok: false, 43 | err: nil, 44 | }, 45 | { 46 | name: "no_ip_set", 47 | cidrs: []string{"::/0"}, 48 | ok: false, 49 | err: ErrMisconfiguration, 50 | }, 51 | { 52 | name: "invalid_ip", 53 | cidrs: []string{"::/0"}, 54 | ip: "According to all natural laws of aviation", 55 | ok: false, 56 | err: ErrMisconfiguration, 57 | }, 58 | } { 59 | t.Run(tt.name, func(t *testing.T) { 60 | rac, err := NewRemoteAddrChecker(tt.cidrs) 61 | if err != nil && !errors.Is(err, tt.err) { 62 | t.Fatalf("creating RemoteAddrChecker failed: %v", err) 63 | } 64 | 65 | r, err := http.NewRequest(http.MethodGet, "/", nil) 66 | if err != nil { 67 | t.Fatalf("can't make request: %v", err) 68 | } 69 | 70 | if tt.ip != "" { 71 | r.Header.Add("X-Real-Ip", tt.ip) 72 | } 73 | 74 | ok, err := rac.Check(r) 75 | 76 | if tt.ok != ok { 77 | t.Errorf("ok: %v, wanted: %v", ok, tt.ok) 78 | } 79 | 80 | if err != nil && tt.err != nil && !errors.Is(err, tt.err) { 81 | t.Errorf("err: %v, wanted: %v", err, tt.err) 82 | } 83 | }) 84 | } 85 | } 86 | 87 | func TestHeaderMatchesChecker(t *testing.T) { 88 | for _, tt := range []struct { 89 | name string 90 | header string 91 | rexStr string 92 | reqHeaderKey string 93 | reqHeaderValue string 94 | ok bool 95 | err error 96 | }{ 97 | { 98 | name: "match", 99 | header: "Cf-Worker", 100 | rexStr: ".*", 101 | reqHeaderKey: "Cf-Worker", 102 | reqHeaderValue: "true", 103 | ok: true, 104 | err: nil, 105 | }, 106 | { 107 | name: "not_match", 108 | header: "Cf-Worker", 109 | rexStr: "false", 110 | reqHeaderKey: "Cf-Worker", 111 | reqHeaderValue: "true", 112 | ok: false, 113 | err: nil, 114 | }, 115 | { 116 | name: "not_present", 117 | header: "Cf-Worker", 118 | rexStr: "foobar", 119 | reqHeaderKey: "Something-Else", 120 | reqHeaderValue: "true", 121 | ok: false, 122 | err: nil, 123 | }, 124 | { 125 | name: "invalid_regex", 126 | rexStr: "a(b", 127 | err: ErrMisconfiguration, 128 | }, 129 | } { 130 | t.Run(tt.name, func(t *testing.T) { 131 | hmc, err := NewHeaderMatchesChecker(tt.header, tt.rexStr) 132 | if err != nil && !errors.Is(err, tt.err) { 133 | t.Fatalf("creating HeaderMatchesChecker failed") 134 | } 135 | 136 | if tt.err != nil && hmc == nil { 137 | return 138 | } 139 | 140 | r, err := http.NewRequest(http.MethodGet, "/", nil) 141 | if err != nil { 142 | t.Fatalf("can't make request: %v", err) 143 | } 144 | 145 | r.Header.Set(tt.reqHeaderKey, tt.reqHeaderValue) 146 | 147 | ok, err := hmc.Check(r) 148 | 149 | if tt.ok != ok { 150 | t.Errorf("ok: %v, wanted: %v", ok, tt.ok) 151 | } 152 | 153 | if err != nil && tt.err != nil && !errors.Is(err, tt.err) { 154 | t.Errorf("err: %v, wanted: %v", err, tt.err) 155 | } 156 | }) 157 | } 158 | } 159 | 160 | func TestHeaderExistsChecker(t *testing.T) { 161 | for _, tt := range []struct { 162 | name string 163 | header string 164 | reqHeader string 165 | ok bool 166 | }{ 167 | { 168 | name: "match", 169 | header: "Authorization", 170 | reqHeader: "Authorization", 171 | ok: true, 172 | }, 173 | { 174 | name: "not_match", 175 | header: "Authorization", 176 | reqHeader: "Authentication", 177 | }, 178 | } { 179 | t.Run(tt.name, func(t *testing.T) { 180 | hec := headerExistsChecker{tt.header} 181 | 182 | r, err := http.NewRequest(http.MethodGet, "/", nil) 183 | if err != nil { 184 | t.Fatalf("can't make request: %v", err) 185 | } 186 | 187 | r.Header.Set(tt.reqHeader, "hunter2") 188 | 189 | ok, err := hec.Check(r) 190 | 191 | if tt.ok != ok { 192 | t.Errorf("ok: %v, wanted: %v", ok, tt.ok) 193 | } 194 | 195 | if err != nil { 196 | t.Errorf("err: %v", err) 197 | } 198 | }) 199 | } 200 | } 201 | -------------------------------------------------------------------------------- /lib/policy/checkresult.go: -------------------------------------------------------------------------------- 1 | package policy 2 | 3 | import ( 4 | "log/slog" 5 | 6 | "github.com/TecharoHQ/anubis/lib/policy/config" 7 | ) 8 | 9 | type CheckResult struct { 10 | Name string 11 | Rule config.Rule 12 | } 13 | 14 | func (cr CheckResult) LogValue() slog.Value { 15 | return slog.GroupValue( 16 | slog.String("name", cr.Name), 17 | slog.String("rule", string(cr.Rule))) 18 | } 19 | -------------------------------------------------------------------------------- /lib/policy/config/testdata/bad/badregexes.json: -------------------------------------------------------------------------------- 1 | { 2 | "bots": [ 3 | { 4 | "name": "path-bad", 5 | "path_regex": "a(b", 6 | "action": "DENY" 7 | }, 8 | { 9 | "name": "user-agent-bad", 10 | "user_agent_regex": "a(b", 11 | "action": "DENY" 12 | }, 13 | { 14 | "name": "headers-bad", 15 | "headers": { 16 | "Accept-Encoding": "a(b" 17 | }, 18 | "action": "DENY" 19 | } 20 | ] 21 | } -------------------------------------------------------------------------------- /lib/policy/config/testdata/bad/badregexes.yaml: -------------------------------------------------------------------------------- 1 | bots: 2 | - name: path-bad 3 | path_regex: "a(b" 4 | action: DENY 5 | - name: user-agent-bad 6 | user_agent_regex: "a(b" 7 | action: DENY -------------------------------------------------------------------------------- /lib/policy/config/testdata/bad/import_and_bot.json: -------------------------------------------------------------------------------- 1 | { 2 | "bots": [ 3 | { 4 | "import": "(data)/bots/ai-robots-txt.yaml", 5 | "name": "generic-browser", 6 | "user_agent_regex": "Mozilla|Opera\n", 7 | "action": "CHALLENGE" 8 | } 9 | ] 10 | } -------------------------------------------------------------------------------- /lib/policy/config/testdata/bad/import_and_bot.yaml: -------------------------------------------------------------------------------- 1 | bots: 2 | - import: (data)/bots/ai-robots-txt.yaml 3 | name: generic-browser 4 | user_agent_regex: > 5 | Mozilla|Opera 6 | action: CHALLENGE -------------------------------------------------------------------------------- /lib/policy/config/testdata/bad/import_invalid_file.json: -------------------------------------------------------------------------------- 1 | { 2 | "bots": [ 3 | { 4 | "import": "(data)/does-not-exist-fake-file.yaml" 5 | } 6 | ] 7 | } -------------------------------------------------------------------------------- /lib/policy/config/testdata/bad/import_invalid_file.yaml: -------------------------------------------------------------------------------- 1 | bots: 2 | - import: (data)/does-not-exist-fake-file.yaml -------------------------------------------------------------------------------- /lib/policy/config/testdata/bad/invalid.json: -------------------------------------------------------------------------------- 1 | { 2 | "bots": [ 3 | {} 4 | ] 5 | } -------------------------------------------------------------------------------- /lib/policy/config/testdata/bad/invalid.yaml: -------------------------------------------------------------------------------- 1 | bots: [] -------------------------------------------------------------------------------- /lib/policy/config/testdata/bad/nobots.json: -------------------------------------------------------------------------------- 1 | {} -------------------------------------------------------------------------------- /lib/policy/config/testdata/bad/nobots.yaml: -------------------------------------------------------------------------------- 1 | {} -------------------------------------------------------------------------------- /lib/policy/config/testdata/good/allow_everyone.json: -------------------------------------------------------------------------------- 1 | { 2 | "bots": [ 3 | { 4 | "name": "everyones-invited", 5 | "remote_addresses": [ 6 | "0.0.0.0/0", 7 | "::/0" 8 | ], 9 | "action": "ALLOW" 10 | } 11 | ] 12 | } -------------------------------------------------------------------------------- /lib/policy/config/testdata/good/allow_everyone.yaml: -------------------------------------------------------------------------------- 1 | bots: 2 | - name: everyones-invited 3 | remote_addresses: 4 | - "0.0.0.0/0" 5 | - "::/0" 6 | action: ALLOW -------------------------------------------------------------------------------- /lib/policy/config/testdata/good/block_cf_workers.json: -------------------------------------------------------------------------------- 1 | { 2 | "bots": [ 3 | { 4 | "name": "Cloudflare Workers", 5 | "headers_regex": { 6 | "CF-Worker": ".*" 7 | }, 8 | "action": "DENY" 9 | } 10 | ], 11 | "dnsbl": false 12 | } -------------------------------------------------------------------------------- /lib/policy/config/testdata/good/block_cf_workers.yaml: -------------------------------------------------------------------------------- 1 | bots: 2 | - name: cloudflare-workers 3 | headers_regex: 4 | CF-Worker: .* 5 | action: DENY -------------------------------------------------------------------------------- /lib/policy/config/testdata/good/challengemozilla.json: -------------------------------------------------------------------------------- 1 | { 2 | "bots": [ 3 | { 4 | "name": "generic-browser", 5 | "user_agent_regex": "Mozilla", 6 | "action": "CHALLENGE" 7 | } 8 | ] 9 | } -------------------------------------------------------------------------------- /lib/policy/config/testdata/good/challengemozilla.yaml: -------------------------------------------------------------------------------- 1 | bots: 2 | - name: generic-browser 3 | user_agent_regex: Mozilla 4 | action: CHALLENGE -------------------------------------------------------------------------------- /lib/policy/config/testdata/good/everything_blocked.json: -------------------------------------------------------------------------------- 1 | { 2 | "bots": [ 3 | { 4 | "name": "everything", 5 | "user_agent_regex": ".*", 6 | "action": "DENY" 7 | } 8 | ], 9 | "dnsbl": false 10 | } -------------------------------------------------------------------------------- /lib/policy/config/testdata/good/everything_blocked.yaml: -------------------------------------------------------------------------------- 1 | bots: 2 | - name: everything 3 | user_agent_regex: .* 4 | action: DENY 5 | -------------------------------------------------------------------------------- /lib/policy/config/testdata/good/import_filesystem.json: -------------------------------------------------------------------------------- 1 | { 2 | "bots": [ 3 | { 4 | "import": "./testdata/hack-test.json" 5 | } 6 | ] 7 | } -------------------------------------------------------------------------------- /lib/policy/config/testdata/good/import_filesystem.yaml: -------------------------------------------------------------------------------- 1 | bots: 2 | - import: ./testdata/hack-test.yaml -------------------------------------------------------------------------------- /lib/policy/config/testdata/good/import_keep_internet_working.json: -------------------------------------------------------------------------------- 1 | { 2 | "bots": [ 3 | { 4 | "import": "(data)/common/keep-internet-working.yaml" 5 | } 6 | ] 7 | } -------------------------------------------------------------------------------- /lib/policy/config/testdata/good/import_keep_internet_working.yaml: -------------------------------------------------------------------------------- 1 | bots: 2 | - import: (data)/common/keep-internet-working.yaml -------------------------------------------------------------------------------- /lib/policy/config/testdata/hack-test.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "name": "ipv6-ula", 4 | "action": "ALLOW", 5 | "remote_addresses": [ 6 | "fc00::/7" 7 | ] 8 | } 9 | ] -------------------------------------------------------------------------------- /lib/policy/config/testdata/hack-test.yaml: -------------------------------------------------------------------------------- 1 | - name: well-known 2 | path_regex: ^/.well-known/.*$ 3 | action: ALLOW -------------------------------------------------------------------------------- /lib/policy/policy.go: -------------------------------------------------------------------------------- 1 | package policy 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | "io" 7 | 8 | "github.com/prometheus/client_golang/prometheus" 9 | "github.com/prometheus/client_golang/prometheus/promauto" 10 | 11 | "github.com/TecharoHQ/anubis/lib/policy/config" 12 | ) 13 | 14 | var ( 15 | Applications = promauto.NewCounterVec(prometheus.CounterOpts{ 16 | Name: "anubis_policy_results", 17 | Help: "The results of each policy rule", 18 | }, []string{"rule", "action"}) 19 | ) 20 | 21 | type ParsedConfig struct { 22 | orig *config.Config 23 | 24 | Bots []Bot 25 | DNSBL bool 26 | DefaultDifficulty int 27 | } 28 | 29 | func NewParsedConfig(orig *config.Config) *ParsedConfig { 30 | return &ParsedConfig{ 31 | orig: orig, 32 | } 33 | } 34 | 35 | func ParseConfig(fin io.Reader, fname string, defaultDifficulty int) (*ParsedConfig, error) { 36 | c, err := config.Load(fin, fname) 37 | if err != nil { 38 | return nil, err 39 | } 40 | 41 | var validationErrs []error 42 | 43 | result := NewParsedConfig(c) 44 | result.DefaultDifficulty = defaultDifficulty 45 | 46 | for _, b := range c.Bots { 47 | if berr := b.Valid(); berr != nil { 48 | validationErrs = append(validationErrs, berr) 49 | continue 50 | } 51 | 52 | parsedBot := Bot{ 53 | Name: b.Name, 54 | Action: b.Action, 55 | } 56 | 57 | cl := CheckerList{} 58 | 59 | if len(b.RemoteAddr) > 0 { 60 | c, err := NewRemoteAddrChecker(b.RemoteAddr) 61 | if err != nil { 62 | validationErrs = append(validationErrs, fmt.Errorf("while processing rule %s remote addr set: %w", b.Name, err)) 63 | } else { 64 | cl = append(cl, c) 65 | } 66 | } 67 | 68 | if b.UserAgentRegex != nil { 69 | c, err := NewUserAgentChecker(*b.UserAgentRegex) 70 | if err != nil { 71 | validationErrs = append(validationErrs, fmt.Errorf("while processing rule %s user agent regex: %w", b.Name, err)) 72 | } else { 73 | cl = append(cl, c) 74 | } 75 | } 76 | 77 | if b.PathRegex != nil { 78 | c, err := NewPathChecker(*b.PathRegex) 79 | if err != nil { 80 | validationErrs = append(validationErrs, fmt.Errorf("while processing rule %s path regex: %w", b.Name, err)) 81 | } else { 82 | cl = append(cl, c) 83 | } 84 | } 85 | 86 | if len(b.HeadersRegex) > 0 { 87 | c, err := NewHeadersChecker(b.HeadersRegex) 88 | if err != nil { 89 | validationErrs = append(validationErrs, fmt.Errorf("while processing rule %s headers regex map: %w", b.Name, err)) 90 | } else { 91 | cl = append(cl, c) 92 | } 93 | } 94 | 95 | if b.Challenge == nil { 96 | parsedBot.Challenge = &config.ChallengeRules{ 97 | Difficulty: defaultDifficulty, 98 | ReportAs: defaultDifficulty, 99 | Algorithm: config.AlgorithmFast, 100 | } 101 | } else { 102 | parsedBot.Challenge = b.Challenge 103 | if parsedBot.Challenge.Algorithm == config.AlgorithmUnknown { 104 | parsedBot.Challenge.Algorithm = config.AlgorithmFast 105 | } 106 | } 107 | 108 | parsedBot.Rules = cl 109 | 110 | result.Bots = append(result.Bots, parsedBot) 111 | } 112 | 113 | if len(validationErrs) > 0 { 114 | return nil, fmt.Errorf("errors validating policy config JSON %s: %w", fname, errors.Join(validationErrs...)) 115 | } 116 | 117 | result.DNSBL = c.DNSBL 118 | 119 | return result, nil 120 | } 121 | -------------------------------------------------------------------------------- /lib/policy/policy_test.go: -------------------------------------------------------------------------------- 1 | package policy 2 | 3 | import ( 4 | "os" 5 | "path/filepath" 6 | "testing" 7 | 8 | "github.com/TecharoHQ/anubis" 9 | "github.com/TecharoHQ/anubis/data" 10 | ) 11 | 12 | func TestDefaultPolicyMustParse(t *testing.T) { 13 | fin, err := data.BotPolicies.Open("botPolicies.json") 14 | if err != nil { 15 | t.Fatal(err) 16 | } 17 | defer fin.Close() 18 | 19 | if _, err := ParseConfig(fin, "botPolicies.json", anubis.DefaultDifficulty); err != nil { 20 | t.Fatalf("can't parse config: %v", err) 21 | } 22 | } 23 | 24 | func TestGoodConfigs(t *testing.T) { 25 | finfos, err := os.ReadDir("config/testdata/good") 26 | if err != nil { 27 | t.Fatal(err) 28 | } 29 | 30 | for _, st := range finfos { 31 | st := st 32 | t.Run(st.Name(), func(t *testing.T) { 33 | fin, err := os.Open(filepath.Join("config", "testdata", "good", st.Name())) 34 | if err != nil { 35 | t.Fatal(err) 36 | } 37 | defer fin.Close() 38 | 39 | if _, err := ParseConfig(fin, fin.Name(), anubis.DefaultDifficulty); err != nil { 40 | t.Fatal(err) 41 | } 42 | }) 43 | } 44 | } 45 | 46 | func TestBadConfigs(t *testing.T) { 47 | finfos, err := os.ReadDir("config/testdata/bad") 48 | if err != nil { 49 | t.Fatal(err) 50 | } 51 | 52 | for _, st := range finfos { 53 | st := st 54 | t.Run(st.Name(), func(t *testing.T) { 55 | fin, err := os.Open(filepath.Join("config", "testdata", "bad", st.Name())) 56 | if err != nil { 57 | t.Fatal(err) 58 | } 59 | defer fin.Close() 60 | 61 | if _, err := ParseConfig(fin, fin.Name(), anubis.DefaultDifficulty); err == nil { 62 | t.Fatal(err) 63 | } else { 64 | t.Log(err) 65 | } 66 | }) 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /lib/policy/testdata/hack-test.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "name": "ipv6-ula", 4 | "action": "ALLOW", 5 | "remote_addresses": [ 6 | "fc00::/7" 7 | ] 8 | } 9 | ] -------------------------------------------------------------------------------- /lib/policy/testdata/hack-test.yaml: -------------------------------------------------------------------------------- 1 | - name: well-known 2 | path_regex: ^/.well-known/.*$ 3 | action: ALLOW -------------------------------------------------------------------------------- /lib/random.go: -------------------------------------------------------------------------------- 1 | package lib 2 | 3 | import ( 4 | "math/rand" 5 | ) 6 | 7 | func randomJitter() bool { 8 | return rand.Intn(100) > 10 9 | } 10 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@techaro/anubis", 3 | "version": "1.0.0-see-VERSION-file", 4 | "description": "", 5 | "main": "index.js", 6 | "scripts": { 7 | "test": "npm run assets && go test ./...", 8 | "test:integration": "npm run assets && go test -v ./internal/test", 9 | "assets": "go generate ./... && ./web/build.sh && ./xess/build.sh", 10 | "build": "npm run assets && go build -o ./var/anubis ./cmd/anubis", 11 | "dev": "npm run assets && go run ./cmd/anubis --use-remote-address", 12 | "container": "npm run assets && go run ./cmd/containerbuild", 13 | "package": "yeet" 14 | }, 15 | "author": "", 16 | "license": "ISC", 17 | "devDependencies": { 18 | "cssnano": "^7.0.6", 19 | "cssnano-preset-advanced": "^7.0.6", 20 | "esbuild": "^0.25.2", 21 | "postcss-cli": "^11.0.1", 22 | "postcss-import": "^16.1.0", 23 | "postcss-import-url": "^7.2.0", 24 | "postcss-url": "^10.1.3" 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /run/anubis.freebsd: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # PROVIDE: anubis 4 | # REQUIRE: NETWORKING 5 | # KEYWORD: shutdown 6 | 7 | . /etc/rc.subr 8 | 9 | name=anubis 10 | rcvar=anubis_enable 11 | 12 | load_rc_config ${name} 13 | 14 | : ${anubis_enable="NO"} 15 | : ${anubis_user="anubis"} 16 | : ${anubis_bin="/usr/local/bin/anubis"} 17 | : ${anubis_environment_file="/etc/anubis.env"} 18 | 19 | command=/usr/sbin/daemon 20 | procname=${anubis_bin} 21 | pidfile=/var/run/anubis.pid 22 | logfile=/var/log/anubis.log 23 | command_args="-c -f -p ${pidfile} -o ${logfile} ${procname}" 24 | start_precmd=anubis_precmd 25 | 26 | anubis_precmd () { 27 | export $(xargs < ${anubis_environment_file}) 28 | if [ ! -f ${logfile} ]; then 29 | install -o anubis /dev/null ${logfile} 30 | fi 31 | install -o anubis /dev/null ${pidfile} 32 | } 33 | 34 | run_rc_command "$1" 35 | -------------------------------------------------------------------------------- /run/anubis@.service: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description="Anubis HTTP defense proxy (instance %i)" 3 | 4 | [Service] 5 | ExecStart=/usr/bin/anubis 6 | Restart=always 7 | RestartSec=30s 8 | EnvironmentFile=/etc/anubis/%i.env 9 | LimitNOFILE=infinity 10 | DynamicUser=yes 11 | CacheDirectory=anubis/%i 12 | CacheDirectoryMode=0755 13 | StateDirectory=anubis/%i 14 | StateDirectoryMode=0755 15 | ReadWritePaths=/run 16 | 17 | [Install] 18 | WantedBy=multi-user.target -------------------------------------------------------------------------------- /run/default.env: -------------------------------------------------------------------------------- 1 | BIND=:8923 2 | DIFFICULTY=4 3 | METRICS_BIND=:9090 4 | SERVE_ROBOTS_TXT=0 5 | TARGET=http://localhost:3000 -------------------------------------------------------------------------------- /var/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !.gitignore -------------------------------------------------------------------------------- /web/build.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -euo pipefail 4 | 5 | cd "$(dirname "$0")" 6 | 7 | LICENSE='/* 8 | @licstart The following is the entire license notice for the 9 | JavaScript code in this page. 10 | 11 | Copyright (c) 2025 Xe Iaso 12 | 13 | Permission is hereby granted, free of charge, to any person obtaining a copy 14 | of this software and associated documentation files (the "Software"), to deal 15 | in the Software without restriction, including without limitation the rights 16 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 17 | copies of the Software, and to permit persons to whom the Software is 18 | furnished to do so, subject to the following conditions: 19 | 20 | The above copyright notice and this permission notice shall be included in 21 | all copies or substantial portions of the Software. 22 | 23 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 24 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 25 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 26 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 27 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 28 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 29 | THE SOFTWARE. 30 | 31 | @licend The above is the entire license notice 32 | for the JavaScript code in this page. 33 | */' 34 | 35 | esbuild js/main.mjs --sourcemap --bundle --minify --outfile=static/js/main.mjs "--banner:js=${LICENSE}" 36 | gzip -f -k static/js/main.mjs 37 | zstd -f -k --ultra -22 static/js/main.mjs 38 | brotli -fZk static/js/main.mjs 39 | 40 | esbuild js/bench.mjs --sourcemap --bundle --minify --outfile=static/js/bench.mjs -------------------------------------------------------------------------------- /web/embed.go: -------------------------------------------------------------------------------- 1 | package web 2 | 3 | import "embed" 4 | 5 | //go:generate go tool github.com/a-h/templ/cmd/templ generate 6 | 7 | var ( 8 | //go:embed static 9 | Static embed.FS 10 | ) 11 | -------------------------------------------------------------------------------- /web/index.go: -------------------------------------------------------------------------------- 1 | package web 2 | 3 | import ( 4 | "github.com/a-h/templ" 5 | 6 | "github.com/TecharoHQ/anubis/lib/policy/config" 7 | ) 8 | 9 | func Base(title string, body templ.Component) templ.Component { 10 | return base(title, body, nil, nil) 11 | } 12 | 13 | func BaseWithChallengeAndOGTags(title string, body templ.Component, challenge string, rules *config.ChallengeRules, ogTags map[string]string) (templ.Component, error) { 14 | return base(title, body, struct { 15 | Challenge string `json:"challenge"` 16 | Rules *config.ChallengeRules `json:"rules"` 17 | }{ 18 | Challenge: challenge, 19 | Rules: rules, 20 | }, ogTags), nil 21 | } 22 | 23 | func Index() templ.Component { 24 | return index() 25 | } 26 | 27 | func ErrorPage(msg string, mail string) templ.Component { 28 | return errorPage(msg, mail) 29 | } 30 | 31 | func Bench() templ.Component { 32 | return bench() 33 | } 34 | -------------------------------------------------------------------------------- /web/js/bench.mjs: -------------------------------------------------------------------------------- 1 | import processFast from "./proof-of-work.mjs"; 2 | import processSlow from "./proof-of-work-slow.mjs"; 3 | 4 | const defaultDifficulty = 4; 5 | const algorithms = { 6 | fast: processFast, 7 | slow: processSlow, 8 | }; 9 | 10 | const status = document.getElementById("status"); 11 | const difficultyInput = document.getElementById("difficulty-input"); 12 | const algorithmSelect = document.getElementById("algorithm-select"); 13 | const compareSelect = document.getElementById("compare-select"); 14 | const header = document.getElementById("table-header"); 15 | const headerCompare = document.getElementById("table-header-compare"); 16 | const results = document.getElementById("results"); 17 | 18 | const setupControls = () => { 19 | difficultyInput.value = defaultDifficulty; 20 | for (const alg of Object.keys(algorithms)) { 21 | const option1 = document.createElement("option"); 22 | algorithmSelect.append(option1); 23 | const option2 = document.createElement("option"); 24 | compareSelect.append(option2); 25 | option1.value = option1.innerText = option2.value = option2.innerText = alg; 26 | } 27 | }; 28 | 29 | const benchmarkTrial = async (stats, difficulty, algorithm, signal) => { 30 | if (!(difficulty >= 1)) { 31 | throw new Error(`Invalid difficulty: ${difficulty}`); 32 | } 33 | const process = algorithms[algorithm]; 34 | if (process == null) { 35 | throw new Error(`Unknown algorithm: ${algorithm}`); 36 | } 37 | 38 | const rawChallenge = new Uint8Array(32); 39 | crypto.getRandomValues(rawChallenge); 40 | const challenge = Array.from(rawChallenge) 41 | .map((c) => c.toString(16).padStart(2, "0")) 42 | .join(""); 43 | 44 | const t0 = performance.now(); 45 | const { hash, nonce } = await process(challenge, Number(difficulty), signal); 46 | const t1 = performance.now(); 47 | console.log({ hash, nonce }); 48 | 49 | stats.time += t1 - t0; 50 | stats.iters += nonce; 51 | 52 | return { time: t1 - t0, nonce }; 53 | }; 54 | 55 | const stats = { time: 0, iters: 0 }; 56 | const comparison = { time: 0, iters: 0 }; 57 | const updateStatus = () => { 58 | const mainRate = stats.iters / stats.time; 59 | const compareRate = comparison.iters / comparison.time; 60 | if (Number.isFinite(mainRate)) { 61 | status.innerText = `Average hashrate: ${mainRate.toFixed(3)}kH/s`; 62 | if (Number.isFinite(compareRate)) { 63 | const change = ((mainRate - compareRate) / mainRate) * 100; 64 | status.innerText += ` vs ${compareRate.toFixed(3)}kH/s (${change.toFixed(2)}% change)`; 65 | } 66 | } else { 67 | status.innerText = "Benchmarking..."; 68 | } 69 | }; 70 | 71 | const tableCell = (text) => { 72 | const td = document.createElement("td"); 73 | td.innerText = text; 74 | td.style.padding = "0 0.25rem"; 75 | return td; 76 | }; 77 | 78 | const benchmarkLoop = async (controller) => { 79 | const difficulty = difficultyInput.value; 80 | const algorithm = algorithmSelect.value; 81 | const compareAlgorithm = compareSelect.value; 82 | updateStatus(); 83 | 84 | try { 85 | const { time, nonce } = await benchmarkTrial( 86 | stats, 87 | difficulty, 88 | algorithm, 89 | controller.signal, 90 | ); 91 | 92 | const tr = document.createElement("tr"); 93 | tr.style.display = "contents"; 94 | tr.append(tableCell(`${time}ms`), tableCell(nonce)); 95 | 96 | // auto-scroll to new rows 97 | const atBottom = 98 | results.scrollHeight - results.clientHeight <= results.scrollTop; 99 | results.append(tr); 100 | if (atBottom) { 101 | results.scrollTop = results.scrollHeight - results.clientHeight; 102 | } 103 | updateStatus(); 104 | 105 | if (compareAlgorithm !== "NONE") { 106 | const { time, nonce } = await benchmarkTrial( 107 | comparison, 108 | difficulty, 109 | compareAlgorithm, 110 | controller.signal, 111 | ); 112 | tr.append(tableCell(`${time}ms`), tableCell(nonce)); 113 | } 114 | } catch (e) { 115 | if (e !== false) { 116 | status.innerText = e; 117 | } 118 | return; 119 | } 120 | 121 | benchmarkLoop(controller); 122 | }; 123 | 124 | let controller = null; 125 | const reset = () => { 126 | stats.time = stats.iters = 0; 127 | comparison.time = comparison.iters = 0; 128 | results.innerHTML = status.innerText = ""; 129 | 130 | const table = results.parentElement; 131 | if (compareSelect.value !== "NONE") { 132 | table.style.gridTemplateColumns = "repeat(4,auto)"; 133 | header.style.display = "none"; 134 | headerCompare.style.display = "contents"; 135 | } else { 136 | table.style.gridTemplateColumns = "repeat(2,auto)"; 137 | header.style.display = "contents"; 138 | headerCompare.style.display = "none"; 139 | } 140 | 141 | if (controller != null) { 142 | controller.abort(); 143 | } 144 | controller = new AbortController(); 145 | benchmarkLoop(controller); 146 | }; 147 | 148 | setupControls(); 149 | difficultyInput.addEventListener("change", reset); 150 | algorithmSelect.addEventListener("change", reset); 151 | compareSelect.addEventListener("change", reset); 152 | reset(); -------------------------------------------------------------------------------- /web/js/proof-of-work-slow.mjs: -------------------------------------------------------------------------------- 1 | // https://dev.to/ratmd/simple-proof-of-work-in-javascript-3kgm 2 | 3 | export default function process( 4 | data, 5 | difficulty = 5, 6 | signal = null, 7 | progressCallback = null, 8 | _threads = 1, 9 | ) { 10 | console.debug("slow algo"); 11 | return new Promise((resolve, reject) => { 12 | let webWorkerURL = URL.createObjectURL(new Blob([ 13 | '(', processTask(), ')()' 14 | ], { type: 'application/javascript' })); 15 | 16 | let worker = new Worker(webWorkerURL); 17 | const terminate = () => { 18 | worker.terminate(); 19 | if (signal != null) { 20 | // clean up listener to avoid memory leak 21 | signal.removeEventListener("abort", terminate); 22 | if (signal.aborted) { 23 | console.log("PoW aborted"); 24 | reject(false); 25 | } 26 | } 27 | }; 28 | if (signal != null) { 29 | signal.addEventListener("abort", terminate, { once: true }); 30 | } 31 | 32 | worker.onmessage = (event) => { 33 | if (typeof event.data === "number") { 34 | progressCallback?.(event.data); 35 | } else { 36 | terminate(); 37 | resolve(event.data); 38 | } 39 | }; 40 | 41 | worker.onerror = (event) => { 42 | terminate(); 43 | reject(event); 44 | }; 45 | 46 | worker.postMessage({ 47 | data, 48 | difficulty 49 | }); 50 | 51 | URL.revokeObjectURL(webWorkerURL); 52 | }); 53 | } 54 | 55 | function processTask() { 56 | return function () { 57 | const sha256 = (text) => { 58 | const encoded = new TextEncoder().encode(text); 59 | return crypto.subtle.digest("SHA-256", encoded.buffer) 60 | .then((result) => 61 | Array.from(new Uint8Array(result)) 62 | .map((c) => c.toString(16).padStart(2, "0")) 63 | .join(""), 64 | ); 65 | }; 66 | 67 | addEventListener('message', async (event) => { 68 | let data = event.data.data; 69 | let difficulty = event.data.difficulty; 70 | 71 | let hash; 72 | let nonce = 0; 73 | do { 74 | if (nonce & 1023 === 0) { 75 | postMessage(nonce); 76 | } 77 | hash = await sha256(data + nonce++); 78 | } while (hash.substring(0, difficulty) !== Array(difficulty + 1).join('0')); 79 | 80 | nonce -= 1; // last nonce was post-incremented 81 | 82 | postMessage({ 83 | hash, 84 | data, 85 | difficulty, 86 | nonce, 87 | }); 88 | }); 89 | }.toString(); 90 | } -------------------------------------------------------------------------------- /web/js/proof-of-work.mjs: -------------------------------------------------------------------------------- 1 | export default function process( 2 | data, 3 | difficulty = 5, 4 | signal = null, 5 | progressCallback = null, 6 | threads = (navigator.hardwareConcurrency || 1), 7 | ) { 8 | console.debug("fast algo"); 9 | return new Promise((resolve, reject) => { 10 | let webWorkerURL = URL.createObjectURL(new Blob([ 11 | '(', processTask(), ')()' 12 | ], { type: 'application/javascript' })); 13 | 14 | const workers = []; 15 | const terminate = () => { 16 | workers.forEach((w) => w.terminate()); 17 | if (signal != null) { 18 | // clean up listener to avoid memory leak 19 | signal.removeEventListener("abort", terminate); 20 | if (signal.aborted) { 21 | console.log("PoW aborted"); 22 | reject(false); 23 | } 24 | } 25 | }; 26 | if (signal != null) { 27 | signal.addEventListener("abort", terminate, { once: true }); 28 | } 29 | 30 | for (let i = 0; i < threads; i++) { 31 | let worker = new Worker(webWorkerURL); 32 | 33 | worker.onmessage = (event) => { 34 | if (typeof event.data === "number") { 35 | progressCallback?.(event.data); 36 | } else { 37 | terminate(); 38 | resolve(event.data); 39 | } 40 | }; 41 | 42 | worker.onerror = (event) => { 43 | terminate(); 44 | reject(event); 45 | }; 46 | 47 | worker.postMessage({ 48 | data, 49 | difficulty, 50 | nonce: i, 51 | threads, 52 | }); 53 | 54 | workers.push(worker); 55 | } 56 | 57 | URL.revokeObjectURL(webWorkerURL); 58 | }); 59 | } 60 | 61 | function processTask() { 62 | return function () { 63 | const sha256 = (text) => { 64 | const encoded = new TextEncoder().encode(text); 65 | return crypto.subtle.digest("SHA-256", encoded.buffer); 66 | }; 67 | 68 | function uint8ArrayToHexString(arr) { 69 | return Array.from(arr) 70 | .map((c) => c.toString(16).padStart(2, "0")) 71 | .join(""); 72 | } 73 | 74 | addEventListener('message', async (event) => { 75 | let data = event.data.data; 76 | let difficulty = event.data.difficulty; 77 | let hash; 78 | let nonce = event.data.nonce; 79 | let threads = event.data.threads; 80 | 81 | const threadId = nonce; 82 | 83 | while (true) { 84 | const currentHash = await sha256(data + nonce); 85 | const thisHash = new Uint8Array(currentHash); 86 | let valid = true; 87 | 88 | for (let j = 0; j < difficulty; j++) { 89 | const byteIndex = Math.floor(j / 2); // which byte we are looking at 90 | const nibbleIndex = j % 2; // which nibble in the byte we are looking at (0 is high, 1 is low) 91 | 92 | let nibble = (thisHash[byteIndex] >> (nibbleIndex === 0 ? 4 : 0)) & 0x0F; // Get the nibble 93 | 94 | if (nibble !== 0) { 95 | valid = false; 96 | break; 97 | } 98 | } 99 | 100 | if (valid) { 101 | hash = uint8ArrayToHexString(thisHash); 102 | console.log(hash); 103 | break; 104 | } 105 | 106 | const oldNonce = nonce; 107 | nonce += threads; 108 | 109 | // send a progress update every 1024 iterations. since each thread checks 110 | // separate values, one simple way to do this is by bit masking the 111 | // nonce for multiples of 1024. unfortunately, if the number of threads 112 | // is not prime, only some of the threads will be sending the status 113 | // update and they will get behind the others. this is slightly more 114 | // complicated but ensures an even distribution between threads. 115 | if ( 116 | nonce > oldNonce | 1023 && // we've wrapped past 1024 117 | (nonce >> 10) % threads === threadId // and it's our turn 118 | ) { 119 | postMessage(nonce); 120 | } 121 | } 122 | 123 | postMessage({ 124 | hash, 125 | data, 126 | difficulty, 127 | nonce, 128 | }); 129 | }); 130 | }.toString(); 131 | } 132 | 133 | -------------------------------------------------------------------------------- /web/js/video.mjs: -------------------------------------------------------------------------------- 1 | const videoElement = ``; 2 | 3 | export const testVideo = async (testarea) => { 4 | testarea.innerHTML = videoElement; 5 | return (await new Promise((resolve) => { 6 | const video = document.getElementById('videotest'); 7 | video.oncanplay = () => { 8 | testarea.style.display = "none"; 9 | resolve(true); 10 | }; 11 | video.onerror = (ev) => { 12 | testarea.style.display = "none"; 13 | resolve(false); 14 | }; 15 | })); 16 | }; -------------------------------------------------------------------------------- /web/static/img/happy.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TecharoHQ/anubis/30b0ba805533fedacc397c0b8a857b58046a3029/web/static/img/happy.webp -------------------------------------------------------------------------------- /web/static/img/pensive.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TecharoHQ/anubis/30b0ba805533fedacc397c0b8a857b58046a3029/web/static/img/pensive.webp -------------------------------------------------------------------------------- /web/static/img/reject.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TecharoHQ/anubis/30b0ba805533fedacc397c0b8a857b58046a3029/web/static/img/reject.webp -------------------------------------------------------------------------------- /web/static/js/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !.gitignore -------------------------------------------------------------------------------- /web/static/robots.txt: -------------------------------------------------------------------------------- 1 | User-agent: AI2Bot 2 | User-agent: Ai2Bot-Dolma 3 | User-agent: Amazonbot 4 | User-agent: anthropic-ai 5 | User-agent: Applebot 6 | User-agent: Applebot-Extended 7 | User-agent: Bytespider 8 | User-agent: CCBot 9 | User-agent: ChatGPT-User 10 | User-agent: Claude-Web 11 | User-agent: ClaudeBot 12 | User-agent: cohere-ai 13 | User-agent: cohere-training-data-crawler 14 | User-agent: Diffbot 15 | User-agent: DuckAssistBot 16 | User-agent: FacebookBot 17 | User-agent: FriendlyCrawler 18 | User-agent: Google-Extended 19 | User-agent: GoogleOther 20 | User-agent: GoogleOther-Image 21 | User-agent: GoogleOther-Video 22 | User-agent: GPTBot 23 | User-agent: iaskspider/2.0 24 | User-agent: ICC-Crawler 25 | User-agent: ImagesiftBot 26 | User-agent: img2dataset 27 | User-agent: ISSCyberRiskCrawler 28 | User-agent: Kangaroo Bot 29 | User-agent: Meta-ExternalAgent 30 | User-agent: Meta-ExternalFetcher 31 | User-agent: OAI-SearchBot 32 | User-agent: omgili 33 | User-agent: omgilibot 34 | User-agent: PanguBot 35 | User-agent: PerplexityBot 36 | User-agent: PetalBot 37 | User-agent: Scrapy 38 | User-agent: SemrushBot 39 | User-agent: Sidetrade indexer bot 40 | User-agent: Timpibot 41 | User-agent: VelenPublicWebCrawler 42 | User-agent: Webzio-Extended 43 | User-agent: YouBot 44 | Disallow: / 45 | 46 | User-agent: * 47 | Disallow: / -------------------------------------------------------------------------------- /web/static/testdata/black.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TecharoHQ/anubis/30b0ba805533fedacc397c0b8a857b58046a3029/web/static/testdata/black.mp4 -------------------------------------------------------------------------------- /xess/.gitignore: -------------------------------------------------------------------------------- 1 | xess.min.css 2 | -------------------------------------------------------------------------------- /xess/build.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -euo pipefail 4 | 5 | cd "$(dirname "$0")" 6 | postcss ./xess.css -o xess.min.css -------------------------------------------------------------------------------- /xess/postcss.config.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | plugins: [ 3 | require("cssnano")({ 4 | preset: "advanced", 5 | }), 6 | require("postcss-url")({ url: "inline" }), 7 | ], 8 | }; -------------------------------------------------------------------------------- /xess/static/geist.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TecharoHQ/anubis/30b0ba805533fedacc397c0b8a857b58046a3029/xess/static/geist.woff2 -------------------------------------------------------------------------------- /xess/static/iosevka-curly.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TecharoHQ/anubis/30b0ba805533fedacc397c0b8a857b58046a3029/xess/static/iosevka-curly.woff2 -------------------------------------------------------------------------------- /xess/static/podkova.css: -------------------------------------------------------------------------------- 1 | @font-face { 2 | font-family: "Podkova"; 3 | font-style: normal; 4 | font-weight: 400 800; 5 | font-display: swap; 6 | src: url("podkova.woff2") format("woff2"); 7 | } 8 | -------------------------------------------------------------------------------- /xess/static/podkova.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TecharoHQ/anubis/30b0ba805533fedacc397c0b8a857b58046a3029/xess/static/podkova.woff2 -------------------------------------------------------------------------------- /xess/xess.css: -------------------------------------------------------------------------------- 1 | :root { 2 | --body-sans-font: Geist, sans-serif; 3 | --body-preformatted-font: Iosevka Curly Iaso, monospace; 4 | --body-title-font: Podkova, serif; 5 | 6 | --dark-background: #1d2021; 7 | --dark-text: #f9f5d7; 8 | --dark-text-selection: #d3869b; 9 | --dark-preformatted-background: #3c3836; 10 | --dark-link-foreground: #b16286; 11 | --dark-link-background: #282828; 12 | --dark-blockquote-border-left: 1px solid #bdae93; 13 | 14 | --light-background: #f9f5d7; 15 | --light-text: #1d2021; 16 | --light-text-selection: #d3869b; 17 | --light-preformatted-background: #ebdbb2; 18 | --light-link-foreground: #b16286; 19 | --light-link-background: #fbf1c7; 20 | --light-blockquote-border-left: 1px solid #655c54; 21 | 22 | --progress-bar-outline: #b16286 solid 4px; 23 | --progress-bar-fill: #b16286; 24 | } 25 | 26 | @font-face { 27 | font-family: "Geist"; 28 | font-style: normal; 29 | font-weight: 100 900; 30 | font-display: swap; 31 | src: url("./static/geist.woff2") format("woff2"); 32 | } 33 | 34 | @font-face { 35 | font-family: "Podkova"; 36 | font-style: normal; 37 | font-weight: 400 800; 38 | font-display: swap; 39 | src: url("./static/podkova.woff2") format("woff2"); 40 | } 41 | 42 | @font-face { 43 | font-family: "Iosevka Curly"; 44 | font-style: monospace; 45 | font-display: swap; 46 | src: url("./static/iosevka-curly.woff2") format("woff2"); 47 | } 48 | 49 | main { 50 | font-family: var(--body-sans-font); 51 | max-width: 50rem; 52 | padding: 2rem; 53 | margin: auto; 54 | } 55 | 56 | ::selection { 57 | background: var(--dark-text-selection); 58 | } 59 | 60 | body { 61 | background: var(--dark-background); 62 | color: var(--dark-text); 63 | } 64 | 65 | body, 66 | html { 67 | height: 100%; 68 | display: flex; 69 | justify-content: center; 70 | align-items: center; 71 | margin-left: auto; 72 | margin-right: auto; 73 | } 74 | 75 | .centered-div { 76 | text-align: center; 77 | } 78 | 79 | #status { 80 | font-variant-numeric: tabular-nums; 81 | } 82 | 83 | .centered-div { 84 | text-align: center; 85 | } 86 | 87 | #status { 88 | font-variant-numeric: tabular-nums; 89 | } 90 | 91 | #progress { 92 | display: none; 93 | width: min(20rem, 90%); 94 | height: 2rem; 95 | border-radius: 1rem; 96 | overflow: hidden; 97 | margin: 1rem 0 2rem; 98 | outline-offset: 2px; 99 | outline: var(--progress-bar-outline); 100 | } 101 | 102 | .bar-inner { 103 | background-color: var(--progress-bar-fill); 104 | height: 100%; 105 | width: 0; 106 | transition: width 0.25s ease-in; 107 | } 108 | 109 | @media (prefers-reduced-motion: no-preference) { 110 | .bar-inner { 111 | transition: width 0.25s ease-in; 112 | } 113 | } 114 | 115 | pre { 116 | background-color: var(--dark-preformatted-background); 117 | padding: 1em; 118 | border: 0; 119 | font-family: var(--body-preformatted-font); 120 | } 121 | 122 | a, 123 | a:active, 124 | a:visited { 125 | color: var(--dark-link-foreground); 126 | background-color: var(--dark-link-background); 127 | } 128 | 129 | h1, 130 | h2, 131 | h3, 132 | h4, 133 | h5 { 134 | margin-bottom: 0.1rem; 135 | font-family: var(--body-title-font); 136 | } 137 | 138 | blockquote { 139 | border-left: var(--dark-blockquote-border-left); 140 | margin: 0.5em 10px; 141 | padding: 0.5em 10px; 142 | } 143 | 144 | footer { 145 | text-align: center; 146 | } 147 | 148 | @media (prefers-color-scheme: light) { 149 | ::selection { 150 | background: var(--light-text-selection); 151 | } 152 | 153 | body { 154 | background: var(--light-background); 155 | color: var(--light-text); 156 | } 157 | 158 | pre { 159 | background-color: var(--light-preformatted-background); 160 | padding: 1em; 161 | border: 0; 162 | } 163 | 164 | a, 165 | a:active, 166 | a:visited { 167 | color: var(--light-link-foreground); 168 | background-color: var(--light-link-background); 169 | } 170 | 171 | h1, 172 | h2, 173 | h3, 174 | h4, 175 | h5 { 176 | margin-bottom: 0.1rem; 177 | } 178 | 179 | blockquote { 180 | border-left: var(--light-blockquote-border-left); 181 | margin: 0.5em 10px; 182 | padding: 0.5em 10px; 183 | } 184 | } 185 | -------------------------------------------------------------------------------- /xess/xess.go: -------------------------------------------------------------------------------- 1 | // Package xess vendors a copy of Xess and makes it available at /.xess/xess.css 2 | // 3 | // This is intended to be used as a vendored package in other projects. 4 | package xess 5 | 6 | import ( 7 | "embed" 8 | "net/http" 9 | "path/filepath" 10 | 11 | "github.com/TecharoHQ/anubis" 12 | "github.com/TecharoHQ/anubis/internal" 13 | ) 14 | 15 | //go:generate go tool github.com/a-h/templ/cmd/templ generate 16 | 17 | var ( 18 | //go:embed *.css static 19 | Static embed.FS 20 | 21 | URL = "/.within.website/x/xess/xess.css" 22 | ) 23 | 24 | func init() { 25 | Mount(http.DefaultServeMux) 26 | 27 | //goland:noinspection GoBoolExpressions 28 | if anubis.Version != "devel" { 29 | URL = filepath.Join(filepath.Dir(URL), "xess.min.css") 30 | } 31 | 32 | URL = URL + "?cachebuster=" + anubis.Version 33 | } 34 | 35 | func Mount(mux *http.ServeMux) { 36 | mux.Handle("/.within.website/x/xess/", internal.UnchangingCache(http.StripPrefix("/.within.website/x/xess/", http.FileServerFS(Static)))) 37 | } 38 | -------------------------------------------------------------------------------- /xess/xess.templ: -------------------------------------------------------------------------------- 1 | package xess 2 | 3 | templ Base(title string, headArea, navBar, bodyArea, footer templ.Component) { 4 | 5 | 6 | 7 | { title } 8 | 9 | 10 | if headArea != nil { 11 | @headArea 12 | } 13 | 14 | 15 |
16 | if navBar != nil { 17 | 20 | } 21 |

{ title }

22 | @bodyArea 23 | if footer != nil { 24 | 27 | } 28 |
29 | 30 | 31 | } 32 | 33 | templ Simple(title string, body templ.Component) { 34 | @Base( 35 | title, 36 | nil, 37 | nil, 38 | body, 39 | nil, 40 | ) 41 | } 42 | -------------------------------------------------------------------------------- /yeetfile.js: -------------------------------------------------------------------------------- 1 | $`npm run assets`; 2 | 3 | ["amd64", "arm64", "riscv64"].forEach(goarch => { 4 | [deb, rpm, tarball].forEach(method => method.build({ 5 | name: "anubis", 6 | description: "Anubis weighs the souls of incoming HTTP requests and uses a sha256 proof-of-work challenge in order to protect upstream resources from scraper bots.", 7 | homepage: "https://anubis.techaro.lol", 8 | license: "MIT", 9 | goarch, 10 | 11 | documentation: { 12 | "./README.md": "README.md", 13 | "./LICENSE": "LICENSE", 14 | "./docs/docs/CHANGELOG.md": "CHANGELOG.md", 15 | "./docs/docs/admin/policies.md": "policies.md", 16 | "./docs/docs/admin/native-install.mdx": "native-install.mdx", 17 | "./data/botPolicies.json": "botPolicies.json", 18 | }, 19 | 20 | build: ({ bin, etc, systemd, out }) => { 21 | $`go build -o ${bin}/anubis -ldflags '-s -w -extldflags "-static" -X "github.com/TecharoHQ/anubis.Version=${git.tag()}"' ./cmd/anubis`; 22 | 23 | file.install("./run/anubis@.service", `${systemd}/anubis@.service`); 24 | file.install("./run/default.env", `${etc}/default.env`); 25 | }, 26 | })); 27 | }); 28 | 29 | // NOTE(Xe): Fixes #217. This is a "half baked" tarball that includes the harder 30 | // parts for deterministic distros already done. Distributions like NixOS, Gentoo 31 | // and *BSD ports have a difficult time fitting the square peg of their dependency 32 | // model into the bazarr of round holes that various modern languages use. Needless 33 | // to say, this makes adoption easier. 34 | tarball.build({ 35 | name: "anubis-src-vendor", 36 | license: "MIT", 37 | // XXX(Xe): This is needed otherwise go will be very sad. 38 | platform: yeet.goos, 39 | goarch: yeet.goarch, 40 | 41 | build: ({ out }) => { 42 | // prepare clean checkout in $out 43 | $`git archive --format=tar HEAD | tar xC ${out}`; 44 | // vendor Go dependencies 45 | $`cd ${out} && go mod vendor`; 46 | // write VERSION file 47 | $`echo ${git.tag()} > ${out}/VERSION`; 48 | }, 49 | 50 | mkFilename: ({ name, version }) => `${name}-${version}`, 51 | }); 52 | 53 | tarball.build({ 54 | name: "anubis-src-vendor-npm", 55 | license: "MIT", 56 | // XXX(Xe): This is needed otherwise go will be very sad. 57 | platform: yeet.goos, 58 | goarch: yeet.goarch, 59 | 60 | build: ({ out }) => { 61 | // prepare clean checkout in $out 62 | $`git archive --format=tar HEAD | tar xC ${out}`; 63 | // vendor Go dependencies 64 | $`cd ${out} && go mod vendor`; 65 | // build NPM-bound dependencies 66 | $`cd ${out} && npm ci && npm run assets && rm -rf node_modules` 67 | // write VERSION file 68 | $`echo ${git.tag()} > ${out}/VERSION`; 69 | }, 70 | 71 | mkFilename: ({ name, version }) => `${name}-${version}`, 72 | }); --------------------------------------------------------------------------------