├── .github └── workflows │ └── publish.yml ├── .gitignore ├── LICENSE ├── Makefile ├── README.md ├── VERSION ├── docker ├── Dockerfile ├── docker-compose.dev.yml └── docker-compose.yml ├── example_sherpa-dns.yaml ├── pyproject.toml ├── renovate.json ├── requirements.txt └── sherpa_dns ├── __init__.py ├── __main__.py ├── config └── config.py ├── controller ├── __init__.py ├── controller.py └── plan.py ├── models ├── __init__.py └── models.py ├── provider └── cloudflare.py ├── registry └── txt_registry.py ├── source └── docker_container.py └── utils ├── __init__.py ├── cleanup_tracker.py └── health.py /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | name: Publish Docker Image 2 | 3 | on: 4 | pull_request: 5 | types: [closed] 6 | branches: 7 | - main 8 | workflow_dispatch: 9 | inputs: 10 | bump_level: 11 | description: 'Select the version bump level' 12 | required: true 13 | type: choice 14 | options: 15 | - patch 16 | - minor 17 | - major 18 | default: 'patch' 19 | 20 | permissions: 21 | contents: write # Needed to push version bump commits and tags 22 | packages: write # Needed to push docker image to GHCR 23 | 24 | jobs: 25 | publish_from_pr: 26 | name: Publish Docker Image from PR 27 | # Only run if PR was merged and has exactly one version label 28 | if: |- 29 | github.event.pull_request.merged == true && 30 | github.event_name == 'pull_request' && 31 | (contains(github.event.pull_request.labels.*.name, 'major') || 32 | contains(github.event.pull_request.labels.*.name, 'minor') || 33 | contains(github.event.pull_request.labels.*.name, 'patch')) 34 | runs-on: ubuntu-latest 35 | steps: 36 | - name: Checkout repository based on PR merge commit 37 | uses: actions/checkout@v5 38 | with: 39 | # Fetch all history and tags for version bumping/tagging 40 | fetch-depth: 0 41 | # Checkout the merge commit 42 | ref: ${{ github.event.pull_request.merge_commit_sha }} 43 | 44 | - name: Determine Version Bump Type from PR Label 45 | id: version_bump 46 | run: | 47 | PR_NUMBER=${{ github.event.pull_request.number }} 48 | REPO="${{ github.repository }}" 49 | API_URL="/repos/$REPO/pulls/$PR_NUMBER" 50 | 51 | echo "Fetching label counts for PR #$PR_NUMBER in repo $REPO" 52 | 53 | # Use gh api with jq to count exact matches for each label type 54 | major=$(gh api --jq '[.labels.[].name] | map(select(. == "major")) | length' "$API_URL") 55 | minor=$(gh api --jq '[.labels.[].name] | map(select(. == "minor")) | length' "$API_URL") 56 | patch=$(gh api --jq '[.labels.[].name] | map(select(. == "patch")) | length' "$API_URL") 57 | 58 | echo "Counts - Major: $major, Minor: $minor, Patch: $patch" 59 | 60 | total=$((major + minor + patch)) 61 | 62 | if [ "$total" -ne 1 ]; then 63 | echo "Error: PR must have exactly one version label (major, minor, or patch). Found $total matching version labels (Major: $major, Minor: $minor, Patch: $patch)." 64 | exit 1 65 | fi 66 | 67 | if [ "$major" -eq 1 ]; then 68 | echo "bump_type=major" 69 | elif [ "$minor" -eq 1 ]; then 70 | echo "bump_type=minor" 71 | elif [ "$patch" -eq 1 ]; then 72 | echo "bump_type=patch" 73 | # This else should theoretically not be reached due to the total check 74 | else 75 | echo "Error: Logic error determining bump type despite total count being 1." 76 | exit 1 77 | fi >> $GITHUB_OUTPUT 78 | env: 79 | # GITHUB_TOKEN is needed for gh api calls 80 | GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} 81 | 82 | - name: Calculate Next Version 83 | id: calc_version 84 | run: | 85 | BUMP_TYPE=${{ steps.version_bump.outputs.bump_type }} 86 | CURRENT_VERSION=$(cat VERSION) 87 | echo "Current version: $CURRENT_VERSION" 88 | echo "Bump type: $BUMP_TYPE" 89 | 90 | # Use awk for shell-based SemVer bumping 91 | current_major=$(echo $CURRENT_VERSION | awk -F. '{print $1}') 92 | current_minor=$(echo $CURRENT_VERSION | awk -F. '{print $2}') 93 | current_patch=$(echo $CURRENT_VERSION | awk -F. '{print $3}') 94 | 95 | if [ "$BUMP_TYPE" == "major" ]; then 96 | new_major=$((current_major + 1)) 97 | new_minor=0 98 | new_patch=0 99 | elif [ "$BUMP_TYPE" == "minor" ]; then 100 | new_major=$current_major 101 | new_minor=$((current_minor + 1)) 102 | new_patch=0 103 | else # patch 104 | new_major=$current_major 105 | new_minor=$current_minor 106 | new_patch=$((current_patch + 1)) 107 | fi 108 | 109 | NEW_VERSION="${new_major}.${new_minor}.${new_patch}" 110 | echo "New version: $NEW_VERSION" 111 | echo "new_version=$NEW_VERSION" >> $GITHUB_OUTPUT 112 | 113 | - name: Update VERSION file 114 | run: | 115 | echo "${{ steps.calc_version.outputs.new_version }}" > VERSION 116 | cat VERSION 117 | 118 | - name: Update docker-compose.yml with new version 119 | run: | 120 | echo "Updating docker-compose.yml with version ${{ steps.calc_version.outputs.new_version }}" 121 | sed -i -E "s|(image: ghcr.io/stedrow/sherpa-dns:)[^[:space:]]+|\\1${{ steps.calc_version.outputs.new_version }}|g" docker/docker-compose.yml 122 | echo "Contents of docker/docker-compose.yml after update:" 123 | cat docker/docker-compose.yml 124 | 125 | - name: Commit and Push Version Update from PR 126 | run: | 127 | git config --global user.name 'github-actions[bot]' 128 | git config --global user.email 'github-actions[bot]@users.noreply.github.com' 129 | git add VERSION docker/docker-compose.yml 130 | # Use merge commit sha in message for traceability 131 | COMMIT_SHA=${{ github.event.pull_request.merge_commit_sha }} 132 | git commit -m "chore: Bump version to ${{ steps.calc_version.outputs.new_version }} for merge $COMMIT_SHA" 133 | # Push directly to the base branch 134 | BASE_REF=${{ github.event.pull_request.base.ref }} 135 | echo "Pushing version update to $BASE_REF" 136 | git push origin HEAD:$BASE_REF 137 | 138 | - name: Create Git Tag 139 | run: | 140 | VERSION="v${{ steps.calc_version.outputs.new_version }}" 141 | echo "Creating tag $VERSION" 142 | git tag $VERSION 143 | git push origin $VERSION 144 | 145 | - name: Set up QEMU 146 | uses: docker/setup-qemu-action@v3 147 | 148 | - name: Set up Docker Buildx 149 | uses: docker/setup-buildx-action@v3 150 | 151 | - name: Log in to GitHub Container Registry 152 | uses: docker/login-action@v3 153 | with: 154 | registry: ghcr.io 155 | username: ${{ github.actor }} 156 | password: ${{ secrets.GITHUB_TOKEN }} 157 | 158 | - name: Docker meta 159 | id: meta 160 | uses: docker/metadata-action@v5 161 | with: 162 | images: ghcr.io/${{ github.repository }} 163 | tags: | 164 | type=semver,pattern={{version}},value=v${{ steps.calc_version.outputs.new_version }} 165 | type=semver,pattern={{major}}.{{minor}},value=v${{ steps.calc_version.outputs.new_version }} 166 | type=semver,pattern={{major}},value=v${{ steps.calc_version.outputs.new_version }} 167 | type=sha 168 | # Add latest tag only for default branch (main) 169 | type=raw,value=latest,enable=${{ github.event.pull_request.base.ref == 'main' }} 170 | 171 | - name: Build and push Docker image 172 | uses: docker/build-push-action@v6 173 | with: 174 | context: . 175 | file: ./docker/Dockerfile 176 | platforms: linux/amd64,linux/arm64 177 | push: true 178 | tags: ${{ steps.meta.outputs.tags }} 179 | labels: ${{ steps.meta.outputs.labels }} 180 | cache-from: type=gha 181 | cache-to: type=gha,mode=max 182 | 183 | publish_manually: 184 | name: Publish Docker Image Manually 185 | runs-on: ubuntu-latest 186 | # Only run on workflow_dispatch trigger 187 | if: github.event_name == 'workflow_dispatch' 188 | steps: 189 | - name: Checkout main branch 190 | uses: actions/checkout@v5 191 | with: 192 | # Fetch all history and tags for version bumping/tagging 193 | fetch-depth: 0 194 | ref: 'main' 195 | 196 | # Version Bump Type is determined by the workflow_dispatch input 197 | - name: Calculate Next Version 198 | id: calc_version 199 | run: | 200 | CURRENT_VERSION=$(cat VERSION) 201 | BUMP_TYPE=${{ github.inputs.bump_level }} 202 | echo "Current version: $CURRENT_VERSION" 203 | echo "Manual bump type: $BUMP_TYPE" 204 | 205 | # Use awk for shell-based SemVer bumping 206 | current_major=$(echo $CURRENT_VERSION | awk -F. '{print $1}') 207 | current_minor=$(echo $CURRENT_VERSION | awk -F. '{print $2}') 208 | current_patch=$(echo $CURRENT_VERSION | awk -F. '{print $3}') 209 | 210 | if [ "$BUMP_TYPE" == "major" ]; then 211 | new_major=$((current_major + 1)) 212 | new_minor=0 213 | new_patch=0 214 | elif [ "$BUMP_TYPE" == "minor" ]; then 215 | new_major=$current_major 216 | new_minor=$((current_minor + 1)) 217 | new_patch=0 218 | else # patch 219 | new_major=$current_major 220 | new_minor=$current_minor 221 | new_patch=$((current_patch + 1)) 222 | fi 223 | 224 | NEW_VERSION="${new_major}.${new_minor}.${new_patch}" 225 | echo "New version: $NEW_VERSION" 226 | echo "new_version=$NEW_VERSION" >> $GITHUB_OUTPUT 227 | 228 | - name: Update VERSION file 229 | run: | 230 | echo "${{ steps.calc_version.outputs.new_version }}" > VERSION 231 | cat VERSION 232 | 233 | - name: Update docker-compose.yml with new version 234 | run: | 235 | echo "Updating docker-compose.yml with version ${{ steps.calc_version.outputs.new_version }}" 236 | sed -i -E "s|(image: ghcr.io/stedrow/sherpa-dns:)[^[:space:]]+|\\1${{ steps.calc_version.outputs.new_version }}|g" docker/docker-compose.yml 237 | echo "Contents of docker/docker-compose.yml after update:" 238 | cat docker/docker-compose.yml 239 | 240 | - name: Commit and Push Version Update from Manual Trigger 241 | run: | 242 | git config --global user.name 'github-actions[bot]' 243 | git config --global user.email 'github-actions[bot]@users.noreply.github.com' 244 | git add VERSION docker/docker-compose.yml 245 | git commit -m "chore(release): Bump version to ${{ steps.calc_version.outputs.new_version }} (manual trigger)" 246 | # Push directly to main branch 247 | echo "Pushing version update to main" 248 | git push origin HEAD:refs/heads/main 249 | 250 | - name: Create Git Tag 251 | run: | 252 | VERSION="v${{ steps.calc_version.outputs.new_version }}" 253 | echo "Creating tag $VERSION" 254 | git tag $VERSION 255 | git push origin $VERSION 256 | 257 | - name: Set up QEMU 258 | uses: docker/setup-qemu-action@v3 259 | 260 | - name: Set up Docker Buildx 261 | uses: docker/setup-buildx-action@v3 262 | 263 | - name: Log in to GitHub Container Registry 264 | uses: docker/login-action@v3 265 | with: 266 | registry: ghcr.io 267 | username: ${{ github.actor }} 268 | password: ${{ secrets.GITHUB_TOKEN }} 269 | 270 | - name: Docker meta 271 | id: meta 272 | uses: docker/metadata-action@v5 273 | with: 274 | images: ghcr.io/${{ github.repository }} 275 | tags: | 276 | type=semver,pattern={{version}},value=v${{ steps.calc_version.outputs.new_version }} 277 | type=semver,pattern={{major}}.{{minor}},value=v${{ steps.calc_version.outputs.new_version }} 278 | type=semver,pattern={{major}},value=v${{ steps.calc_version.outputs.new_version }} 279 | type=sha 280 | # Add latest tag since this is running on main 281 | type=raw,value=latest,enable=true 282 | 283 | - name: Build and push Docker image 284 | uses: docker/build-push-action@v6 285 | with: 286 | context: . 287 | file: ./docker/Dockerfile 288 | platforms: linux/amd64,linux/arm64 289 | push: true 290 | tags: ${{ steps.meta.outputs.tags }} 291 | labels: ${{ steps.meta.outputs.labels }} 292 | cache-from: type=gha 293 | cache-to: type=gha,mode=max -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *.so 5 | 6 | # Distribution / packaging 7 | build/ 8 | dist/ 9 | *.egg-info/ 10 | *.egg 11 | 12 | # Environment variables 13 | .env 14 | 15 | # Virtual Environments 16 | .venv/ 17 | env/ 18 | venv/ 19 | ENV/ 20 | 21 | # Test / coverage reports 22 | .coverage 23 | .coverage.* 24 | .cache 25 | htmlcov/ 26 | .pytest_cache/ 27 | 28 | # mypy 29 | .mypy_cache/ 30 | 31 | # IDE files 32 | .idea/ 33 | .vscode/ 34 | *.swp 35 | *.swo 36 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 Scott Tedrow 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: build run run-dev stop clean test lint format help 2 | 3 | # Docker image name 4 | IMAGE_NAME = sherpa-dns 5 | # Compose files 6 | COMPOSE_FILE = docker/docker-compose.yml 7 | COMPOSE_DEV_FILE = docker/docker-compose.dev.yml 8 | # Dockerfile location 9 | DOCKERFILE = docker/Dockerfile 10 | 11 | # Build the Docker image 12 | build: 13 | docker build -t $(IMAGE_NAME):latest -f $(DOCKERFILE) . 14 | 15 | # Run the application in production mode (uses pre-built image from docker-compose.yml) 16 | run: 17 | # Note: This command uses the image specified in $(COMPOSE_FILE) 18 | # Ensure the image ghcr.io/stedrow/sherpa-dns: exists and the config is present 19 | docker compose --env-file .env -f $(COMPOSE_FILE) up -d 20 | 21 | # Run the application in development mode (builds locally, with logs) 22 | run-dev: 23 | # This command builds the image locally using $(COMPOSE_DEV_FILE) 24 | # --env-file .env ensures environment variables are loaded from the project root 25 | # --build forces a rebuild of the image 26 | docker compose --env-file .env -f $(COMPOSE_DEV_FILE) up --build 27 | 28 | # Stop the running application (stops services defined in either compose file) 29 | stop: 30 | # Attempts to stop services defined in both compose files, ignoring errors if one isn't running 31 | -docker compose -f $(COMPOSE_FILE) down 32 | -docker compose -f $(COMPOSE_DEV_FILE) down 33 | 34 | # Clean up Docker resources (removes locally built image) 35 | clean: stop 36 | docker rmi $(IMAGE_NAME):latest || true # Ignore error if image doesn't exist 37 | 38 | # Run linting 39 | lint: 40 | ruff check . 41 | black --check sherpa_dns 42 | 43 | # Run formatting and apply fixes 44 | format: 45 | isort sherpa_dns 46 | ruff check --fix . 47 | black sherpa_dns 48 | 49 | # Show help 50 | help: 51 | @echo "Available commands:" 52 | @echo " make build - Build the local Docker image using $(DOCKERFILE)" 53 | @echo " make run - Run production app using pre-built image defined in $(COMPOSE_FILE)" 54 | @echo " make run-dev - Build locally and run dev app using $(COMPOSE_DEV_FILE) (loads .env)" 55 | @echo " make stop - Stop running application containers (prod or dev)" 56 | @echo " make clean - Stop containers and remove the locally built Docker image ($(IMAGE_NAME):latest)" 57 | @echo " make lint - Run linting checks" 58 | @echo " make format - Sort imports, apply lint fixes, and formatting" 59 | @echo " make help - Show this help message" 60 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Sherpa-DNS 2 | 3 | Sherpa-DNS is a python application designed to create and manage DNS records for services defined in docker compose stacks or stand-alone docker containers via labels. It draws inspiration from the Kubernetes External-DNS project but is specifically tailored for docker environments. 4 | 5 | ## Table of Contents 6 | 7 | - [Installation](#installation) 8 | - [Using Docker Compose](#using-docker-compose-recommended) 9 | - [Configuration (`sherpa-dns.yaml`)](#configuration-sherpa-dnsyaml) 10 | - [`source` Section](#source-section) 11 | - [`provider` Section](#provider-section) 12 | - [`registry` Section](#registry-section) 13 | - [`controller` Section](#controller-section) 14 | - [`domains` Section](#domains-section) 15 | - [`logging` Section](#logging-section) 16 | - [Usage: Docker Labels](#usage-docker-labels) 17 | - [Label Schema](#label-schema) 18 | - [Examples](#examples) 19 | - [How it Works](#how-it-works) 20 | - [Development](#development) 21 | - [License](#license) 22 | 23 | ## Installation 24 | 25 | Sherpa-DNS runs as a Docker container. It needs read access to the Docker API to monitor container events. Because the Sherpa-DNS image is based on Chainguard and runs as a non-root user (`nonroot` UID 65532) for enhanced security, it cannot directly access the host's Docker socket (`/var/run/docker.sock`) due to permissions. 26 | 27 | To solve this securely, we rely on a dedicated [docker-socket-proxy](https://github.com/11notes/docker-socket-proxy) container. This proxy container runs with the necessary privileges to access the host's Docker socket. It then exposes a limited, read-only version of the Docker API which the non-root Sherpa-DNS container can safely connect to without needing direct access to the host socket itself. 28 | 29 | The recommended installation method using Docker Compose handles setting up this proxy automatically. 30 | 31 | ### Using Docker Compose 32 | 33 | This method uses the pre-built Sherpa-DNS image from GitHub Container Registry and includes the `docker-socket-proxy` for secure, non-root access to the Docker API. This is the easiest and most secure way to run Sherpa-DNS. 34 | 35 | 1. **Download the compose file:** Download `docker-compose.yml` from the [docker/directory](https://github.com/stedrow/sherpa-dns/blob/main/docker/docker-compose.yml) of the Sherpa-DNS repository. 36 | 2. **Create `sherpa-dns.yaml`:** In the **same directory** where you saved `docker-compose.yml`, create your `sherpa-dns.yaml` configuration file. You can copy [`example_sherpa-dns.yaml`](https://github.com/stedrow/sherpa-dns/blob/main/example_sherpa-dns.yaml) as a starting point and modify it. 37 | 3. **Create `.env` file:** In the **same directory**, create an `.env` file with your Cloudflare API token and, optionally, your encryption key (if `registry.encrypt_txt` is `true` in your config): 38 | ```bash 39 | # .env file contents 40 | CLOUDFLARE_API_TOKEN=your_api_token_here 41 | ENCRYPTION_KEY=your_secret_passphrase_here # Only needed if registry.encrypt=true 42 | ``` 43 | 5. **Run Docker Compose:** From the directory containing your `docker-compose.yml`, `sherpa-dns.yaml`, and `.env` file, run: 44 | ```bash 45 | docker compose up -d 46 | ``` 47 | This will start both the `docker-socket-proxy` and the `sherpa-dns` service. 48 | 49 | ## Configuration (`sherpa-dns.yaml`) 50 | 51 | Sherpa-DNS uses a YAML file (default: `sherpa-dns.yaml` passed as an argument, or looked for at `/config/sherpa-dns.yaml` inside the container) for configuration. Environment variables like `${VAR_NAME}` can be used and will be substituted from the container's environment (e.g., passed via `.env` or `-e`). 52 | 53 | You can use [example_sherpa-dns.yaml](https://github.com/stedrow/sherpa-dns/blob/main/example_sherpa-dns.yaml) as a starting point to create your own config. 54 | 55 | ### `source` Section 56 | 57 | Configures how Sherpa-DNS discovers target endpoints. 58 | 59 | * `label_prefix` (string): The prefix for Docker labels used by Sherpa-DNS. 60 | * Default: `"sherpa.dns"` 61 | * `label_filter` (string, optional): If set, Sherpa-DNS will only process containers that have a specific label matching this filter. 62 | * Format: Can be just a key (e.g., `"enable-sherpa"`) to check for the label's existence, or key=value (e.g., `"enable-sherpa=true"`) to check for a specific key and value. 63 | * Default: `""` (empty string, meaning no filtering - process all containers). 64 | 65 | ### `provider` Section 66 | 67 | Configures the DNS provider where records will be managed. 68 | 69 | * `name` (string): The name of the DNS provider. 70 | * Default: `"cloudflare"` (Currently the only supported provider). 71 | * `cloudflare`: A nested section containing Cloudflare-specific settings. 72 | * `api_token` (string): Your Cloudflare API token. **Required**. Use environment variable substitution (e.g., `"${CLOUDFLARE_API_TOKEN}"`). 73 | * `proxied_by_default` (boolean): Sets the default "Proxied" status for created A/CNAME records if not specified by a label. 74 | * Default: `false` 75 | 76 | ### `registry` Section 77 | 78 | Configures how Sherpa-DNS tracks the DNS records it manages. 79 | 80 | * `type` (string): The type of registry to use. 81 | * Default: `"txt"` (Currently the only supported type, uses TXT records for tracking). 82 | * `txt_prefix` (string): A prefix added to the hostname when creating the corresponding TXT registry record. This helps identify Sherpa-managed TXT records and avoids conflicts (e.g., a TXT and CNAME cannot have the same name). 83 | * Default: `"sherpa-dns-"` 84 | * `txt_owner_id` (string): An identifier written into the TXT record content (`owner=...`) to distinguish records managed by different Sherpa-DNS instances (e.g., staging vs. production). 85 | * Default: `"default"` 86 | * `txt_wildcard_replacement` (string): A string used to replace the literal `*` character in a hostname when generating the TXT record's *name*. This ensures the TXT record name itself is valid DNS syntax (e.g., `*.example.com` becomes `sherpa-dns-star.example.com` if the replacement is `star`). 87 | * Default: `"star"` 88 | * `encrypt_txt` (boolean): Whether to encrypt the content of the TXT registry records. 89 | * Default: `false` 90 | * `encryption_key` (string, optional): A **secret passphrase** used to derive the encryption key if `encrypt_txt` is `true`. **Do not use the raw encryption key here.** Use environment variable substitution (e.g., `"${ENCRYPTION_KEY}"`). Required if `encrypt_txt` is `true`. 91 | 92 | ### `controller` Section 93 | 94 | Configures the main reconciliation logic. 95 | 96 | * `interval` (string): How often the controller reconciles the desired state (from Docker labels) with the actual state (from DNS provider). Uses duration format (e.g., `60s`, `1m`, `5m`). 97 | * Default: `"1m"` 98 | * `once` (boolean): If `true`, run the reconciliation loop only once and then exit. Useful for testing or specific scripting scenarios. 99 | * Default: `false` 100 | * `dry_run` (boolean): If `true`, calculate changes but do not actually make any calls to the DNS provider API. Logs planned changes instead. 101 | * Default: `false` 102 | * `cleanup_on_stop` (boolean): If `true`, DNS records for containers that stop/disappear will be queued for deletion after a delay. If `false`, records are left behind. 103 | * Default: `true` 104 | * `cleanup_delay` (string): How long to wait after a container stops before deleting its DNS records. Uses duration format (e.g., `30s`, `15m`, `1h`). Only relevant if `cleanup_on_stop` is `true`. 105 | * Default: `"15m"` 106 | 107 | ### `domains` Section 108 | 109 | Filters which DNS zones the provider should manage. 110 | 111 | * `include` (list of strings, optional): Only manage zones matching these domain names/patterns. Patterns can include wildcards (`*`). If empty or omitted, all zones accessible by the API token are potentially managed (subject to `exclude`). 112 | * `exclude` (list of strings, optional): Explicitly exclude zones matching these domain names/patterns. Exclusions take precedence over inclusions. 113 | 114 | ### `logging` Section 115 | 116 | Configures application logging. 117 | 118 | * `level` (string): The minimum log level to output. Standard Python levels (e.g., `"debug"`, `"info"`, `"warning"`, `"error"`). 119 | * Default: `"info"` 120 | 121 | ## Usage: Docker Labels 122 | 123 | You control which DNS records Sherpa-DNS creates by adding labels to your Docker containers (either directly in `docker run` or within the `labels:` section of a `docker-compose.yml` service). 124 | 125 | ### Label Schema 126 | 127 | Use the prefix defined in `source.label_prefix` (default `sherpa.dns`). 128 | 129 | * **`sherpa.dns/hostname` (string): Required.** The fully qualified domain name (FQDN) for the DNS record (e.g., `myapp.example.com`, `*.internal.example.com`). 130 | * **`sherpa.dns/type` (string): Optional.** The type of DNS record to create. 131 | * Values: `"A"`, `"CNAME"` 132 | * Default: `"A"` 133 | * **`sherpa.dns/target` (string): Optional.** The target/value of the DNS record. 134 | * Default for `A` records: The IP address of the container within the default Docker bridge network (or a specific network if networking is configured differently - *check source code for exact logic*). 135 | * Default for `CNAME` records: The container's name. 136 | * You can override this to point an `A` record to a specific IP or a `CNAME` to a specific target hostname. 137 | * **`sherpa.dns/ttl` (string): Optional.** The Time-To-Live for the DNS record in seconds. 138 | * Value: A number representing seconds (e.g., `"300"` for 5 minutes) OR the special value `"1"` which maps to Cloudflare's "Auto" TTL. 139 | * Default: Cloudflare's default TTL (usually "Auto" / 1). 140 | * **`sherpa.dns/proxied` (string): Optional.** Whether the DNS record should be proxied through Cloudflare (orange cloud). 141 | * Values: `"true"`, `"false"` 142 | * Default: The value of `provider.cloudflare.proxied_by_default` in `sherpa-dns.yaml` (which defaults to `false`). 143 | 144 | ### Examples 145 | 146 | **A Record for a Web App (Auto IP, Auto TTL, Not Proxied):** 147 | 148 | ```yaml 149 | # docker-compose.yml 150 | services: 151 | my-web-app: 152 | image: nginx:latest 153 | labels: 154 | - "sherpa.dns/hostname=app.example.com" 155 | ``` 156 | 157 | **A Record with Specific IP and TTL (Proxied):** 158 | 159 | ```yaml 160 | # docker-compose.yml 161 | services: 162 | backend-service: 163 | image: nginx:latest 164 | labels: 165 | - "sherpa.dns/hostname=api.example.com" 166 | - "sherpa.dns/target=123.123.1.20" 167 | - "sherpa.dns/ttl=600" 168 | - "sherpa.dns/proxied=true" 169 | ``` 170 | 171 | **CNAME Record:** 172 | 173 | ```yaml 174 | # docker-compose.yml 175 | services: 176 | redirector: 177 | image: nginx:latest 178 | labels: 179 | - "sherpa.dns/hostname=old-app.example.com" 180 | - "sherpa.dns/type=CNAME" 181 | - "sherpa.dns/target=new-app.example.com" # Point to another DNS name 182 | - "sherpa.dns/ttl=1" # Auto TTL 183 | ``` 184 | 185 | **Wildcard Record:** 186 | 187 | ```yaml 188 | # docker-compose.yml 189 | services: 190 | wildcard-handler: 191 | image: nginx:latest 192 | labels: 193 | - "sherpa.dns/hostname=*.internal.example.com" 194 | - "sherpa.dns/target=192.168.1.100" # Target IP for the wildcard A record 195 | - "sherpa.dns/type=A" 196 | ``` 197 | 198 | ## How it Works 199 | 200 | Sherpa-DNS operates with a few key components: 201 | 202 | 1. **Source (`DockerContainerSource`):** Watches the Docker daemon for container events (start, stop, die) and periodically lists running containers. It extracts DNS configuration from container labels that match the configured prefix and filter. 203 | 2. **Registry (`TXTRegistry`):** Queries the DNS Provider (Cloudflare) for special TXT records that act as a database. It uses these TXT records (identified by `txt_prefix` and `txt_owner_id`) to determine which A/CNAME records it currently manages. 204 | 3. **Provider (`CloudflareProvider`):** Interacts with the Cloudflare API to list zones, list existing DNS records, create new records, update records, and delete records. 205 | 4. **Controller (`Controller`):** The central coordinator. It periodically: 206 | * Gets the *desired* state (list of `Endpoint` objects) from the Source. 207 | * Gets the *current* state (list of managed `Endpoint` objects) from the Registry. 208 | * Calculates the *changes* needed (create, update, delete) using the `Plan`. 209 | * Tells the Registry to `sync` the changes, which involves calls to the Provider API to modify A/CNAME records *and* the corresponding TXT registry records. 210 | * Manages a delayed cleanup mechanism for records associated with stopped containers. 211 | 5. **Health Server (`HealthCheckServer`):** Provides basic `/health` and `/metrics` endpoints for monitoring. 212 | 213 | ## Development 214 | 215 | If you want to contribute or run the code locally for development: 216 | 217 | 1. Clone the repository. 218 | 2. Create your `.env` and `sherpa-dns.yaml` files. 219 | 3. Use the provided `Makefile`: 220 | * `make format`: Format code using `isort`, `ruff`, `black`. 221 | * `make lint`: Check code style using `ruff` and `black`. 222 | * `make run-dev`: Build the image locally and run the container with logs attached, using `docker/docker-compose.dev.yml`. This forces a rebuild on each run. 223 | * `make stop`: Stop any running `make run-dev` or `make run` containers. 224 | * `make help`: Display available commands. 225 | 226 | ## License 227 | 228 | This project is licensed under the MIT License. See the `LICENSE` file for details. 229 | -------------------------------------------------------------------------------- /VERSION: -------------------------------------------------------------------------------- 1 | 0.2.3 2 | -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM cgr.dev/chainguard/python:latest-dev AS builder 2 | 3 | ENV PYTHONUNBUFFERED=1 4 | ENV PYTHONDONTWRITEBYTECODE=1 5 | ENV PATH="/app/venv/bin:$PATH" 6 | 7 | WORKDIR /app 8 | 9 | # Copy and install dependencies in virtual environment 10 | COPY requirements.txt . 11 | RUN python -m venv /app/venv && pip install --no-cache-dir -r requirements.txt 12 | 13 | # --- Final Stage --- 14 | FROM cgr.dev/chainguard/python:latest 15 | 16 | ENV PYTHONUNBUFFERED=1 17 | ENV PYTHONDONTWRITEBYTECODE=1 18 | ENV PATH="/app/venv/bin:$PATH" 19 | 20 | WORKDIR /app 21 | 22 | # Copy virtual environment from builder stage 23 | COPY --from=builder /app/venv /app/venv 24 | 25 | # Copy application code and VERSION file 26 | COPY ./sherpa_dns ./sherpa_dns 27 | COPY VERSION . 28 | 29 | # Expose ports for health check and metrics 30 | EXPOSE 8080 31 | 32 | # Run the application as the entrypoint 33 | ENTRYPOINT ["python", "-m", "sherpa_dns", "/config/sherpa-dns.yaml"] 34 | -------------------------------------------------------------------------------- /docker/docker-compose.dev.yml: -------------------------------------------------------------------------------- 1 | services: 2 | docker-socket-proxy: 3 | image: 11notes/socket-proxy:2.1.4 4 | container_name: docker-socket-proxy-dev 5 | read_only: true 6 | user: "0:0" # make sure to use the same UID/GID as the owner of your docker socket! 7 | restart: always 8 | environment: 9 | - SOCKET_PROXY_UID=65532 # nonroot, used by sherpa-dns image 10 | - SOCKET_PROXY_GID=65532 # nonroot, used by sherpa-dns image 11 | volumes: 12 | - "/run/docker.sock:/run/docker.sock:ro" 13 | - "socket-proxy:/run/proxy" 14 | 15 | sherpa-dns: 16 | build: 17 | context: .. 18 | dockerfile: docker/Dockerfile 19 | container_name: sherpa-dns-dev 20 | environment: 21 | - CLOUDFLARE_API_TOKEN=${CLOUDFLARE_API_TOKEN} 22 | - ENCRYPTION_KEY=${ENCRYPTION_KEY} 23 | # Enable debug logging for development 24 | - SHERPA_LOG_LEVEL=debug 25 | volumes: 26 | # Connect to the PROXY's socket via the shared volume 27 | - "socket-proxy:/var/run" 28 | - "../sherpa-dns.yaml:/config/sherpa-dns.yaml:ro" 29 | ports: 30 | - "8080:8080" # Expose health/metrics port 31 | depends_on: 32 | docker-socket-proxy: 33 | condition: service_healthy # Wait for proxy to be ready 34 | 35 | volumes: 36 | socket-proxy: 37 | -------------------------------------------------------------------------------- /docker/docker-compose.yml: -------------------------------------------------------------------------------- 1 | services: 2 | docker-socket-proxy: 3 | image: 11notes/socket-proxy:2.1.4 4 | container_name: docker-socket-proxy 5 | read_only: true 6 | user: "0:0" # make sure to use the same UID/GID as the owner of your docker socket! 7 | restart: always 8 | environment: 9 | - SOCKET_PROXY_UID=65532 # nonroot, used by sherpa-dns image 10 | - SOCKET_PROXY_GID=65532 # nonroot, used by sherpa-dns image 11 | volumes: 12 | - "/run/docker.sock:/run/docker.sock:ro" 13 | - "socket-proxy:/run/proxy" 14 | 15 | sherpa-dns: 16 | image: ghcr.io/stedrow/sherpa-dns:0.2.3 17 | container_name: sherpa-dns 18 | restart: unless-stopped 19 | environment: 20 | - CLOUDFLARE_API_TOKEN=${CLOUDFLARE_API_TOKEN} 21 | - ENCRYPTION_KEY=${ENCRYPTION_KEY} 22 | volumes: 23 | # Connect to the PROXY's socket via the shared volume 24 | - "socket-proxy:/var/run" 25 | - "../sherpa-dns.yaml:/config/sherpa-dns.yaml:ro" 26 | ports: 27 | - "8080:8080" # Expose health/metrics port 28 | depends_on: 29 | docker-socket-proxy: 30 | condition: service_healthy # Wait for proxy to be ready 31 | 32 | volumes: 33 | socket-proxy: 34 | -------------------------------------------------------------------------------- /example_sherpa-dns.yaml: -------------------------------------------------------------------------------- 1 | source: 2 | label_prefix: "sherpa.dns" 3 | label_filter: "" 4 | 5 | provider: 6 | name: "cloudflare" 7 | cloudflare: 8 | api_token: "${CLOUDFLARE_API_TOKEN}" 9 | proxied_by_default: false 10 | 11 | registry: 12 | type: "txt" 13 | txt_prefix: "sherpa-dns-" 14 | txt_owner_id: "default" 15 | txt_wildcard_replacement: "star" 16 | encrypt: true 17 | encryption_key: "${ENCRYPTION_KEY:-}" 18 | 19 | controller: 20 | interval: "1m" 21 | once: false 22 | dry_run: false 23 | cleanup_on_stop: true 24 | cleanup_delay: "15m" 25 | 26 | domains: 27 | include: 28 | # - "internal.example.com" 29 | # exclude: 30 | # - "internal.example.com" 31 | 32 | logging: 33 | level: "info" 34 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.ruff] 2 | # Enable Flake8 rules 3 | select = ["E", "F"] 4 | 5 | # Ignore specific rules 6 | ignore = ["E501"] # Line too long 7 | 8 | # Line length 9 | line-length = 100 10 | 11 | # Assume Python 3.12 12 | target-version = "py312" 13 | -------------------------------------------------------------------------------- /renovate.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://docs.renovatebot.com/renovate-schema.json", 3 | "extends": [ 4 | "config:recommended" 5 | ], 6 | "dependencyDashboard": true, 7 | "commitMessagePrefix": "chore(deps): ", 8 | "labels": ["dependencies"], 9 | "prConcurrentLimit": 5, 10 | "prHourlyLimit": 2, 11 | "packageRules": [ 12 | { 13 | "description": "Group GitHub Actions updates", 14 | "matchManagers": ["github-actions"], 15 | "groupName": "github actions" 16 | }, 17 | { 18 | "description": "Group Dockerfile dependencies (base image, tools)", 19 | "matchManagers": ["dockerfile"], 20 | "groupName": "docker dependencies" 21 | }, 22 | { 23 | "description": "Group Python requirements updates", 24 | "matchManagers": ["pip_requirements"], 25 | "groupName": "python requirements" 26 | }, 27 | { 28 | "description": "Exclude the main application image in docker-compose.yml (updated by workflow)", 29 | "matchFileNames": ["docker/docker-compose.yml"], 30 | "matchPackageNames": ["ghcr.io/stedrow/sherpa-dns"], 31 | "enabled": false 32 | } 33 | ] 34 | } 35 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | docker==7.1.0 2 | pydantic==2.11.7 3 | PyYAML==6.0.2 4 | CloudFlare==4.3.1 5 | cryptography==45.0.6 6 | -------------------------------------------------------------------------------- /sherpa_dns/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Sherpa-DNS: A Python application to create and manage DNS records for Docker Compose services. 3 | """ 4 | 5 | __version__ = "0.1.0" 6 | -------------------------------------------------------------------------------- /sherpa_dns/__main__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Main entry point for Sherpa-DNS. 3 | """ 4 | 5 | import asyncio 6 | import logging 7 | import sys 8 | from pathlib import Path 9 | 10 | from sherpa_dns.config.config import Config 11 | from sherpa_dns.controller.controller import Controller 12 | from sherpa_dns.provider.cloudflare import CloudflareProvider 13 | from sherpa_dns.registry.txt_registry import TXTRegistry 14 | from sherpa_dns.source.docker_container import DockerContainerSource 15 | from sherpa_dns.utils.health import HealthCheckServer 16 | 17 | # Define the path to the version file within the container 18 | VERSION_FILE_PATH = Path("/app/VERSION") 19 | 20 | 21 | async def main(): 22 | """Main entry point running all components concurrently.""" 23 | app_version = "unknown" 24 | try: 25 | if VERSION_FILE_PATH.is_file(): 26 | app_version = VERSION_FILE_PATH.read_text().strip() 27 | except Exception as e: 28 | logging.warning(f"Could not read version file {VERSION_FILE_PATH}: {e}") 29 | 30 | # Setup logging 31 | logging.basicConfig( 32 | level=logging.INFO, 33 | format="%(asctime)s [%(levelname)s] %(name)s: %(message)s", 34 | datefmt="%H:%M:%S", 35 | handlers=[logging.StreamHandler(sys.stdout)], 36 | ) 37 | logger = logging.getLogger("sherpa-dns") 38 | logger.info(f"Starting Sherpa-DNS v{app_version}") 39 | 40 | # Load configuration 41 | config_path = Path(sys.argv[1]) if len(sys.argv) > 1 else None 42 | config = Config.from_yaml(config_path) 43 | 44 | # Set log level from configuration 45 | log_level = getattr(logging, config.log_level.upper(), logging.INFO) 46 | logging.getLogger().setLevel(log_level) 47 | # Set httpx logger level to WARNING unless root is DEBUG 48 | httpx_log_level = logging.DEBUG if log_level == logging.DEBUG else logging.WARNING 49 | logging.getLogger("httpx").setLevel(httpx_log_level) 50 | 51 | # Initialize components 52 | source = DockerContainerSource(config.label_prefix, config.label_filter) 53 | provider = CloudflareProvider( 54 | config.cloudflare_api_token, 55 | domain_filter=config.domain_filter, 56 | exclude_domains=config.exclude_domains, 57 | proxied_by_default=config.cloudflare_proxied_by_default, 58 | dry_run=config.dry_run, 59 | ) 60 | registry = TXTRegistry( 61 | provider, 62 | txt_prefix=config.txt_prefix, 63 | txt_owner_id=config.txt_owner_id, 64 | txt_wildcard_replacement=config.txt_wildcard_replacement, 65 | encrypt_txt=config.encrypt_txt, 66 | encryption_key=config.encryption_key, 67 | ) 68 | controller = Controller( 69 | source, 70 | registry, 71 | provider, 72 | interval=config.interval, 73 | cleanup_delay=config.cleanup_delay, 74 | cleanup_on_stop=config.cleanup_on_stop, 75 | ) 76 | 77 | # Start health check server 78 | health_server = HealthCheckServer() 79 | health_server.start() 80 | 81 | try: 82 | # Run tasks concurrently 83 | if config.once: 84 | # Run once and exit 85 | await controller.run_once() 86 | else: 87 | logger.debug( 88 | "Starting background tasks: Reconciliation Loop, Source Event Listener, Controller Event Watcher, Cleanup Tracker" 89 | ) 90 | # Run continuously 91 | await asyncio.gather( 92 | controller.run_reconciliation_loop(), 93 | source.watch_events(), 94 | controller.watch_events(), 95 | controller.run_cleanup_tracker(), 96 | ) 97 | finally: 98 | # Stop health check server 99 | health_server.stop() 100 | 101 | 102 | if __name__ == "__main__": 103 | try: 104 | asyncio.run(main()) 105 | except KeyboardInterrupt: 106 | print("\nShutting down Sherpa-DNS") 107 | sys.exit(0) 108 | -------------------------------------------------------------------------------- /sherpa_dns/config/config.py: -------------------------------------------------------------------------------- 1 | """ 2 | Configuration module for Sherpa-DNS. 3 | """ 4 | 5 | import os 6 | import re 7 | from pathlib import Path 8 | from typing import List, Optional, Union 9 | 10 | import yaml 11 | from pydantic import BaseModel, Field 12 | 13 | 14 | class Config(BaseModel): 15 | """Configuration for Sherpa-DNS.""" 16 | 17 | # Source configuration 18 | label_prefix: str = "sherpa.dns" 19 | label_filter: str = "" 20 | 21 | # Provider configuration 22 | provider: str = "cloudflare" 23 | cloudflare_api_token: str = "" 24 | cloudflare_proxied_by_default: bool = False 25 | 26 | # Registry configuration 27 | registry: str = "txt" 28 | txt_prefix: str = "sherpa-dns-" 29 | txt_owner_id: str = "default" 30 | txt_wildcard_replacement: str = "star" 31 | encrypt_txt: bool = False 32 | encryption_key: Optional[str] = None 33 | 34 | # Controller configuration 35 | interval: str = "1m" 36 | once: bool = False 37 | dry_run: bool = False 38 | cleanup_on_stop: bool = True 39 | cleanup_delay: str = "15m" 40 | 41 | # Domain filtering 42 | domain_filter: List[str] = Field(default_factory=list) 43 | exclude_domains: List[str] = Field(default_factory=list) 44 | 45 | # Logging configuration 46 | log_level: str = "info" 47 | 48 | @classmethod 49 | def from_yaml(cls, config_path: Optional[Union[str, Path]] = None) -> "Config": 50 | """ 51 | Load configuration from a YAML file. 52 | 53 | Args: 54 | config_path: Path to the YAML configuration file 55 | 56 | Returns: 57 | Config: Config instance populated with values from the YAML file 58 | """ 59 | # Default configuration paths to check 60 | default_paths = [ 61 | Path("./sherpa-dns.yaml"), 62 | Path("./sherpa-dns.yml"), 63 | Path("/etc/sherpa-dns/sherpa-dns.yaml"), 64 | Path("/etc/sherpa-dns/config.yaml"), 65 | ] 66 | 67 | # If config_path is provided, use it 68 | if config_path: 69 | paths = [Path(config_path)] 70 | else: 71 | paths = default_paths 72 | 73 | # Try to load configuration from the first existing path 74 | config_data = {} 75 | for path in paths: 76 | if path.exists(): 77 | with open(path, "r") as f: 78 | yaml_content = f.read() 79 | # Substitute environment variables 80 | yaml_content = cls._substitute_env_vars(yaml_content) 81 | config_data = yaml.safe_load(yaml_content) 82 | break 83 | 84 | # Flatten nested configuration 85 | flat_config = cls._flatten_config(config_data) 86 | 87 | # Create and return Config instance 88 | return cls(**flat_config) 89 | 90 | @staticmethod 91 | def _substitute_env_vars(content: str) -> str: 92 | """ 93 | Substitute environment variables in the configuration content. 94 | 95 | Args: 96 | content: Configuration content 97 | 98 | Returns: 99 | str: Configuration content with environment variables substituted 100 | """ 101 | # Pattern for ${ENV_VAR} or ${ENV_VAR:-default} 102 | pattern = r"\${([^}]+)}" 103 | 104 | def replace_env_var(match): 105 | env_var = match.group(1) 106 | if ":-" in env_var: 107 | env_var, default = env_var.split(":-", 1) 108 | return os.environ.get(env_var, default) 109 | return os.environ.get(env_var, "") 110 | 111 | return re.sub(pattern, replace_env_var, content) 112 | 113 | @staticmethod 114 | def _flatten_config(config_data: dict) -> dict: 115 | """ 116 | Flatten nested configuration. 117 | 118 | Args: 119 | config_data: Nested configuration data 120 | 121 | Returns: 122 | dict: Flattened configuration data 123 | """ 124 | flat_config = {} 125 | 126 | # Source configuration 127 | source = config_data.get("source", {}) 128 | flat_config["label_prefix"] = source.get("label_prefix", "sherpa.dns") 129 | flat_config["label_filter"] = source.get("label_filter", "") 130 | 131 | # Provider configuration 132 | provider = config_data.get("provider", {}) 133 | flat_config["provider"] = provider.get("name", "cloudflare") 134 | 135 | # Cloudflare provider configuration 136 | cloudflare = provider.get("cloudflare", {}) 137 | flat_config["cloudflare_api_token"] = cloudflare.get("api_token", "") 138 | flat_config["cloudflare_proxied_by_default"] = cloudflare.get( 139 | "proxied_by_default", False 140 | ) 141 | 142 | # Registry configuration 143 | registry = config_data.get("registry", {}) 144 | flat_config["registry"] = registry.get("type", "txt") 145 | flat_config["txt_prefix"] = registry.get("txt_prefix", "sherpa-dns-") 146 | flat_config["txt_owner_id"] = registry.get("txt_owner_id", "default") 147 | flat_config["txt_wildcard_replacement"] = registry.get( 148 | "txt_wildcard_replacement", "star" 149 | ) 150 | flat_config["encrypt_txt"] = registry.get("encrypt", False) 151 | flat_config["encryption_key"] = registry.get("encryption_key", None) 152 | 153 | # Controller configuration 154 | controller = config_data.get("controller", {}) 155 | flat_config["interval"] = controller.get("interval", "1m") 156 | flat_config["once"] = controller.get("once", False) 157 | flat_config["dry_run"] = controller.get("dry_run", False) 158 | flat_config["cleanup_on_stop"] = controller.get("cleanup_on_stop", True) 159 | flat_config["cleanup_delay"] = controller.get("cleanup_delay", "15m") 160 | 161 | # Domain filtering 162 | domains = config_data.get("domains", {}) 163 | flat_config["domain_filter"] = domains.get("include", []) 164 | flat_config["exclude_domains"] = domains.get("exclude", []) 165 | 166 | # Logging configuration 167 | logging = config_data.get("logging", {}) 168 | flat_config["log_level"] = logging.get("level", "info") 169 | 170 | return flat_config 171 | 172 | def parse_duration(self, duration_str: str) -> int: 173 | """ 174 | Parse a duration string like '15m' into seconds. 175 | 176 | Args: 177 | duration_str: Duration string 178 | 179 | Returns: 180 | int: Duration in seconds 181 | """ 182 | if not duration_str: 183 | return 60 # Default to 1 minute 184 | 185 | # Pattern for duration string (e.g., 15m, 1h, 30s) 186 | pattern = r"^(\d+)([smhd])$" 187 | match = re.match(pattern, duration_str) 188 | 189 | if not match: 190 | return 60 # Default to 1 minute 191 | 192 | value, unit = match.groups() 193 | value = int(value) 194 | 195 | # Convert to seconds 196 | if unit == "s": 197 | return value 198 | elif unit == "m": 199 | return value * 60 200 | elif unit == "h": 201 | return value * 60 * 60 202 | elif unit == "d": 203 | return value * 60 * 60 * 24 204 | 205 | return 60 # Default to 1 minute 206 | -------------------------------------------------------------------------------- /sherpa_dns/controller/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stedrow/sherpa-dns/3f6c987a2d5fcaccfb5371f42e60e3c2c522ae41/sherpa_dns/controller/__init__.py -------------------------------------------------------------------------------- /sherpa_dns/controller/controller.py: -------------------------------------------------------------------------------- 1 | """ 2 | Controller module for Sherpa-DNS. 3 | 4 | This module is responsible for coordinating between the source, registry, and provider 5 | components to ensure that the desired state is maintained. 6 | """ 7 | 8 | import asyncio 9 | import logging 10 | from typing import Optional 11 | 12 | from sherpa_dns.controller.plan import Plan 13 | from sherpa_dns.utils.cleanup_tracker import CleanupTracker 14 | 15 | 16 | class Controller: 17 | """ 18 | Controller that coordinates between the source, registry, and provider components. 19 | """ 20 | 21 | def __init__( 22 | self, 23 | source, 24 | registry, 25 | provider, 26 | interval: str = "1m", 27 | cleanup_delay: str = "15m", 28 | cleanup_on_stop: bool = True, 29 | ): 30 | """ 31 | Initialize a Controller. 32 | 33 | Args: 34 | source: Source component 35 | registry: Registry component 36 | provider: Provider component 37 | interval: Reconciliation interval 38 | cleanup_delay: Delay before cleaning up DNS records for stopped containers 39 | cleanup_on_stop: Whether to clean up DNS records for stopped containers 40 | """ 41 | self.source = source 42 | self.registry = registry 43 | self.provider = provider 44 | self.interval = self._parse_interval(interval) 45 | self.cleanup_on_stop = cleanup_on_stop 46 | self.cleanup_tracker = CleanupTracker(cleanup_delay) 47 | self.logger = logging.getLogger("sherpa-dns.controller") 48 | self._event_triggered_reconcile_task: Optional[ 49 | asyncio.Task 50 | ] = None # Task for debouncing 51 | self._debounce_delay = 2 # Seconds to wait after an event before reconciling 52 | 53 | async def run_reconciliation_loop(self) -> None: 54 | """ 55 | Runs the controller's reconciliation loop at the specified interval. 56 | """ 57 | # Log at DEBUG as the main task start is logged in __main__ 58 | self.logger.debug( 59 | f"Reconciliation loop starting with interval {self.interval} seconds" 60 | ) 61 | 62 | while True: 63 | try: 64 | await self.run_once() 65 | except Exception as e: 66 | self.logger.error(f"Error in reconciliation loop: {e}") 67 | 68 | await asyncio.sleep(self.interval) 69 | 70 | async def run_once(self) -> None: 71 | """ 72 | Performs a single reconciliation run. 73 | """ 74 | 75 | try: 76 | # Get desired endpoints from source 77 | desired_endpoints = await self.source.endpoints() 78 | 79 | # Get current endpoints from registry 80 | current_endpoints = await self.registry.records() 81 | 82 | # Calculate plan first to know if changes are pending 83 | plan = Plan( 84 | current_endpoints, desired_endpoints, policy="sync" 85 | ).calculate_changes() 86 | potential_deletes = {ep.id: ep for ep in plan.delete} 87 | # Base pending changes ONLY on creates/updates, as deletes are handled by tracker 88 | has_pending_changes = bool(plan.create or plan.update_old) 89 | 90 | # Log summary - use DEBUG level if system is stable (no immediate changes) 91 | log_level = logging.DEBUG if not has_pending_changes else logging.INFO 92 | 93 | self.logger.log( 94 | log_level, 95 | f"Running reconciliation: Found {len(desired_endpoints)} desired and " 96 | f"{len(current_endpoints)} current endpoints.", 97 | ) 98 | 99 | if potential_deletes and self.cleanup_on_stop: 100 | for endpoint_id, endpoint in potential_deletes.items(): 101 | # Mark these endpoints for future deletion using the tracker 102 | self.cleanup_tracker.mark_for_deletion(endpoint_id) 103 | self.logger.debug( 104 | f"Ensured endpoint {endpoint_id} ({endpoint.dnsname}) is marked for delayed cleanup." 105 | ) 106 | 107 | # IMPORTANT: Clear the delete list from the plan that gets synced immediately 108 | plan.delete = [] 109 | elif potential_deletes: # cleanup_on_stop is False 110 | self.logger.info( 111 | f"Identified {len(potential_deletes)} endpoints not in desired state, but cleanup_on_stop=False. Ignoring." 112 | ) 113 | # Clear deletes from the plan as we don't delete on stop 114 | plan.delete = [] 115 | 116 | # ---- End Deletion Handling Modification ---- 117 | 118 | # Apply ONLY creates and updates now 119 | # plan.delete is now always empty here 120 | if plan.has_changes(): 121 | # Adjust log message as plan.delete is now empty 122 | self.logger.info( 123 | f"Applying changes: {len(plan.create)} creates, {len(plan.update_old)} updates" 124 | ) 125 | await self.registry.sync(plan) # Syncs only creates/updates 126 | else: 127 | self.logger.debug("No immediate changes (creates/updates) to apply") 128 | 129 | # Process cleanup tracker (this handles the actual deletions after delay) 130 | await self.process_cleanup() 131 | except Exception as e: 132 | self.logger.error( 133 | f"Error in reconciliation: {e}", exc_info=True 134 | ) # Add exc_info for better debugging 135 | 136 | async def process_cleanup(self) -> None: 137 | """ 138 | Process the cleanup tracker to handle delayed deletions. 139 | """ 140 | if not self.cleanup_on_stop: 141 | self.logger.debug("Cleanup on stop is disabled, skipping cleanup process.") 142 | return 143 | 144 | # Log status of pending deletions 145 | try: 146 | pending_status = self.cleanup_tracker.get_pending_status() 147 | if pending_status: 148 | self.logger.debug( 149 | f"Checking status of {len(pending_status)} endpoints pending deletion:" 150 | ) 151 | for endpoint_id, remaining_time in pending_status.items(): 152 | if remaining_time > 0: 153 | self.logger.debug( 154 | f" - Endpoint ID {endpoint_id} will be eligible for deletion in {remaining_time:.1f} seconds." 155 | ) 156 | else: 157 | # This case should be rare as get_eligible_for_deletion usually handles it first 158 | self.logger.debug( 159 | f" - Endpoint ID {endpoint_id} is overdue for deletion by {-remaining_time:.1f} seconds." 160 | ) 161 | else: 162 | self.logger.debug("No endpoints currently pending deletion.") 163 | except Exception as e: 164 | self.logger.error( 165 | f"Error getting pending cleanup status: {e}", exc_info=True 166 | ) 167 | # Continue with eligibility check anyway 168 | 169 | # Get endpoints eligible for deletion 170 | try: 171 | eligible_ids = self.cleanup_tracker.get_eligible_for_deletion() 172 | 173 | if not eligible_ids: 174 | self.logger.debug("No endpoints eligible for deletion currently.") 175 | return 176 | 177 | self.logger.debug( 178 | f"Found {len(eligible_ids)} endpoints eligible for deletion by tracker." 179 | ) 180 | 181 | # Get current endpoints - needed to construct deletion plan 182 | current_endpoints = await self.registry.records() 183 | current_endpoints_map = {ep.id: ep for ep in current_endpoints} 184 | 185 | # Filter endpoints eligible for deletion that still exist 186 | endpoints_to_delete = [] 187 | for endpoint_id in eligible_ids: 188 | if endpoint_id in current_endpoints_map: 189 | endpoints_to_delete.append(current_endpoints_map[endpoint_id]) 190 | else: 191 | self.logger.warning( 192 | f"Endpoint ID {endpoint_id} marked for deletion, but not found in current records. Already deleted?" 193 | ) 194 | 195 | if endpoints_to_delete: 196 | # Create a deletion plan 197 | deletion_plan = Plan.deletion_only(endpoints_to_delete) 198 | 199 | # Apply changes 200 | self.logger.debug( 201 | f"Applying deletion plan for {len(endpoints_to_delete)} endpoints after cleanup delay" 202 | ) 203 | await self.registry.sync(deletion_plan) 204 | else: 205 | self.logger.debug( 206 | "No existing endpoints matched the eligible IDs for deletion." 207 | ) 208 | 209 | except Exception as e: 210 | self.logger.error(f"Error during cleanup processing: {e}", exc_info=True) 211 | 212 | async def process_event(self, event: dict) -> None: 213 | """ 214 | Process a Docker event and trigger reconciliation if needed. 215 | Relies on run_once() to handle marking/unmarking based on state diff, 216 | but attempts to unmark quickly on 'start' events. 217 | 218 | Args: 219 | event: Docker event 220 | """ 221 | event_type = event.get("status") 222 | container_id = event.get("id") 223 | container_short_id = container_id[:12] if container_id else "N/A" 224 | 225 | if not container_id: 226 | self.logger.warning("Received event with no container ID.") 227 | return 228 | 229 | if event_type in ["die", "stop", "kill"]: 230 | # Container stopped. run_once() will detect it missing and mark via cleanup_tracker. 231 | # Log at DEBUG level to reduce noise from multiple stop-related events 232 | self.logger.debug( 233 | f"Container {container_short_id} stopped event ({event_type}) received." 234 | ) 235 | elif event_type == "start": 236 | # Container started. run_once() will ensure it's created/updated. 237 | # Log at DEBUG level, actual reconciliation scheduling logged later at INFO 238 | self.logger.debug(f"Container {container_short_id} started event received.") 239 | try: 240 | # Fetch desired state to find the endpoints associated with this container 241 | desired_endpoints = await self.source.endpoints() # Fetch fresh state 242 | container_endpoints = [ 243 | endpoint 244 | for endpoint in desired_endpoints 245 | if endpoint.container_id == container_id 246 | ] 247 | if container_endpoints: 248 | self.logger.debug( 249 | f"Unmarking {len(container_endpoints)} endpoints for started container {container_short_id}" 250 | ) 251 | for endpoint in container_endpoints: 252 | # Tell tracker this endpoint is active again 253 | self.cleanup_tracker.unmark_for_deletion(endpoint.id) 254 | else: 255 | self.logger.debug( 256 | f"No desired endpoints found for started container {container_short_id} to unmark (might lack labels?)." 257 | ) 258 | except Exception as e: 259 | self.logger.error( 260 | f"Error fetching desired endpoints during start event processing for {container_short_id}: {e}", 261 | exc_info=True, 262 | ) 263 | else: 264 | self.logger.debug( 265 | f"Ignoring Docker event type '{event_type}' for container {container_short_id}" 266 | ) 267 | return # Don't reconcile on ignored events 268 | 269 | # --- Debounce Reconciliation Trigger --- 270 | if ( 271 | self._event_triggered_reconcile_task 272 | and not self._event_triggered_reconcile_task.done() 273 | ): 274 | self.logger.debug( 275 | f"Reconciliation already scheduled/running due to a recent event. " 276 | f"Debouncing trigger from event '{event_type}' for container {container_short_id}." 277 | ) 278 | return 279 | 280 | # Schedule the debounced reconciliation 281 | # This log remains INFO as it indicates a reconciliation WILL be scheduled 282 | self.logger.info( 283 | f"Scheduling reconciliation due to event '{event_type}' for container {container_short_id} (after {self._debounce_delay}s delay)" 284 | ) 285 | self._event_triggered_reconcile_task = asyncio.create_task( 286 | self._run_once_debounced() 287 | ) 288 | # Add a callback to clear the task variable once it's done (optional, helps cleanup) 289 | # self._event_triggered_reconcile_task.add_done_callback(lambda _: setattr(self, '_event_triggered_reconcile_task', None)) 290 | 291 | # Note: We no longer call self.run_once() directly here. 292 | # await self.run_once() 293 | 294 | async def _run_once_debounced(self): 295 | """Runs run_once after a short delay to allow debouncing. 296 | Clears the tracking task variable upon completion. 297 | """ 298 | try: 299 | await asyncio.sleep(self._debounce_delay) 300 | self.logger.debug( 301 | "Running event-triggered reconciliation after debounce delay." 302 | ) 303 | await self.run_once() 304 | except asyncio.CancelledError: 305 | self.logger.debug("Debounced reconciliation task cancelled.") 306 | except Exception as e: 307 | self.logger.error( 308 | f"Error during debounced reconciliation run: {e}", exc_info=True 309 | ) 310 | # Task is finished, future checks in process_event will see it as done. 311 | 312 | async def run_cleanup_tracker(self) -> None: 313 | """ 314 | Run the cleanup tracker's background processing. 315 | """ 316 | self.logger.debug("Cleanup tracker task started.") 317 | 318 | while True: 319 | try: 320 | await self.process_cleanup() 321 | except Exception as e: 322 | self.logger.error(f"Error in cleanup tracker: {e}") 323 | 324 | await asyncio.sleep(60) # Check every minute 325 | 326 | async def watch_events(self) -> None: 327 | """ 328 | Watch for Docker events and process them. 329 | """ 330 | self.logger.debug("Controller event watcher task started.") 331 | 332 | while True: 333 | try: 334 | # Get event from queue 335 | event = await self.source.event_queue.get() 336 | 337 | # Process event 338 | await self.process_event(event) 339 | 340 | # Mark task as done 341 | self.source.event_queue.task_done() 342 | except Exception as e: 343 | self.logger.error(f"Error processing event: {e}") 344 | 345 | @staticmethod 346 | def _parse_interval(interval: str) -> int: 347 | """ 348 | Parse an interval string like '1m' into seconds. 349 | 350 | Args: 351 | interval: Interval string 352 | 353 | Returns: 354 | int: Interval in seconds 355 | """ 356 | if not interval: 357 | return 60 # Default to 1 minute 358 | 359 | # Pattern for interval string (e.g., 1m, 5s, 1h) 360 | if interval.endswith("s"): 361 | return int(interval[:-1]) 362 | elif interval.endswith("m"): 363 | return int(interval[:-1]) * 60 364 | elif interval.endswith("h"): 365 | return int(interval[:-1]) * 60 * 60 366 | elif interval.endswith("d"): 367 | return int(interval[:-1]) * 60 * 60 * 24 368 | 369 | # If no unit is specified, assume seconds 370 | try: 371 | return int(interval) 372 | except ValueError: 373 | return 60 # Default to 1 minute 374 | -------------------------------------------------------------------------------- /sherpa_dns/controller/plan.py: -------------------------------------------------------------------------------- 1 | """ 2 | Plan module for Sherpa-DNS. 3 | 4 | This module is responsible for calculating the changes needed to bring the current state 5 | in line with the desired state. 6 | """ 7 | 8 | import logging 9 | from typing import Dict, List 10 | 11 | from sherpa_dns.models.models import Changes, Endpoint 12 | 13 | 14 | class Plan: 15 | """ 16 | Plan calculates the changes needed to bring the current state in line with the desired state. 17 | """ 18 | 19 | def __init__( 20 | self, current: List[Endpoint], desired: List[Endpoint], policy: str = "sync" 21 | ): 22 | """ 23 | Initialize a Plan. 24 | 25 | Args: 26 | current: Current endpoints 27 | desired: Desired endpoints 28 | policy: Synchronization policy (sync, upsert-only, create-only) 29 | """ 30 | self.current = current 31 | self.desired = desired 32 | self.policy = policy 33 | self.logger = logging.getLogger("sherpa-dns.plan") 34 | 35 | def calculate_changes(self) -> Changes: 36 | """ 37 | Calculate the changes needed to bring the current state in line with the desired state. 38 | 39 | Returns: 40 | Changes: Changes to be applied 41 | """ 42 | changes = Changes() 43 | 44 | # Index current endpoints by ID for faster lookup 45 | current_by_id: Dict[str, Endpoint] = { 46 | endpoint.id: endpoint for endpoint in self.current 47 | } 48 | 49 | # Process desired endpoints 50 | for desired_endpoint in self.desired: 51 | current_endpoint = current_by_id.get(desired_endpoint.id) 52 | 53 | if current_endpoint: 54 | # Endpoint exists, check if it needs to be updated 55 | if self._needs_update(current_endpoint, desired_endpoint): 56 | self.logger.info(f"Endpoint {desired_endpoint.id} needs update") 57 | changes.update_old.append(current_endpoint) 58 | changes.update_new.append(desired_endpoint) 59 | else: 60 | self.logger.debug(f"Endpoint {desired_endpoint.id} is up-to-date") 61 | else: 62 | # Endpoint doesn't exist, create it 63 | self.logger.info(f"Endpoint {desired_endpoint.id} will be created") 64 | changes.create.append(desired_endpoint) 65 | 66 | # Process current endpoints that are not in desired endpoints 67 | if self.policy == "sync": 68 | desired_ids = {endpoint.id for endpoint in self.desired} 69 | for current_endpoint in self.current: 70 | if current_endpoint.id not in desired_ids: 71 | self.logger.debug( 72 | f"Endpoint {current_endpoint.id} ({current_endpoint.dnsname}) identified as no longer desired." 73 | ) 74 | changes.delete.append(current_endpoint) 75 | 76 | return changes 77 | 78 | @staticmethod 79 | def _needs_update(current: Endpoint, desired: Endpoint) -> bool: 80 | """ 81 | Check if an endpoint needs to be updated. 82 | 83 | Args: 84 | current: Current endpoint 85 | desired: Desired endpoint 86 | 87 | Returns: 88 | bool: True if the endpoint needs to be updated, False otherwise 89 | """ 90 | # Check if targets are different 91 | if set(current.targets) != set(desired.targets): 92 | return True 93 | 94 | # Check if TTL is different 95 | if current.record_ttl != desired.record_ttl: 96 | return True 97 | 98 | # Check if proxied status is different 99 | if current.proxied != desired.proxied: 100 | return True 101 | 102 | return False 103 | 104 | @classmethod 105 | def deletion_only(cls, endpoints: List[Endpoint]) -> Changes: 106 | """ 107 | Create a plan that only deletes the specified endpoints. 108 | 109 | Args: 110 | endpoints: Endpoints to delete 111 | 112 | Returns: 113 | Changes: Changes to be applied 114 | """ 115 | changes = Changes() 116 | changes.delete = endpoints 117 | return changes 118 | -------------------------------------------------------------------------------- /sherpa_dns/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stedrow/sherpa-dns/3f6c987a2d5fcaccfb5371f42e60e3c2c522ae41/sherpa_dns/models/__init__.py -------------------------------------------------------------------------------- /sherpa_dns/models/models.py: -------------------------------------------------------------------------------- 1 | """ 2 | Data models for Sherpa-DNS. 3 | """ 4 | 5 | from dataclasses import dataclass, field 6 | from typing import List, Optional 7 | 8 | 9 | @dataclass 10 | class Endpoint: 11 | """ 12 | Represents a DNS endpoint (record) to be managed by Sherpa-DNS. 13 | """ 14 | 15 | dnsname: str 16 | targets: List[str] 17 | record_type: str 18 | record_ttl: Optional[int] = None 19 | proxied: bool = False 20 | container_id: Optional[str] = None 21 | container_name: Optional[str] = None 22 | 23 | @property 24 | def id(self) -> str: 25 | """ 26 | Generate a unique identifier for this endpoint. 27 | 28 | Returns: 29 | str: Unique identifier 30 | """ 31 | return f"{self.dnsname}:{self.record_type}" 32 | 33 | 34 | @dataclass 35 | class Changes: 36 | """ 37 | Represents changes to be applied to DNS records. 38 | """ 39 | 40 | create: List[Endpoint] = field(default_factory=list) 41 | update_old: List[Endpoint] = field(default_factory=list) 42 | update_new: List[Endpoint] = field(default_factory=list) 43 | delete: List[Endpoint] = field(default_factory=list) 44 | 45 | def has_changes(self) -> bool: 46 | """ 47 | Check if there are any changes to be applied. 48 | 49 | Returns: 50 | bool: True if there are changes, False otherwise 51 | """ 52 | return bool(self.create or self.update_old or self.delete) 53 | -------------------------------------------------------------------------------- /sherpa_dns/provider/cloudflare.py: -------------------------------------------------------------------------------- 1 | """ 2 | Cloudflare provider module for Sherpa-DNS. 3 | 4 | This module is responsible for interfacing with the Cloudflare API to manage DNS records. 5 | """ 6 | 7 | import logging 8 | import re 9 | from typing import Dict, List, Optional 10 | 11 | import cloudflare 12 | 13 | from sherpa_dns.models.models import Changes, Endpoint 14 | 15 | 16 | class CloudflareProvider: 17 | """ 18 | Provider that interfaces with the Cloudflare API. 19 | """ 20 | 21 | def __init__( 22 | self, 23 | api_token: str, 24 | domain_filter: Optional[List[str]] = None, 25 | exclude_domains: Optional[List[str]] = None, 26 | proxied_by_default: bool = False, 27 | dry_run: bool = False, 28 | ): 29 | """ 30 | Initialize a CloudflareProvider. 31 | 32 | Args: 33 | api_token: Cloudflare API token 34 | domain_filter: List of domains to include 35 | exclude_domains: List of domains to exclude 36 | proxied_by_default: Whether to proxy records by default 37 | dry_run: Whether to run in dry-run mode 38 | """ 39 | self.api_token = api_token 40 | self.domain_filter = domain_filter or [] 41 | self.exclude_domains = exclude_domains or [] 42 | self.proxied_by_default = proxied_by_default 43 | self.dry_run = dry_run 44 | self.logger = logging.getLogger("sherpa-dns.provider.cloudflare") 45 | 46 | # Initialize Cloudflare client - use lowercase class name and api_token argument 47 | self.cf = cloudflare.Cloudflare(api_token=api_token) 48 | 49 | # Cache for zone IDs 50 | self.zone_id_cache: Dict[str, str] = {} 51 | 52 | async def zones(self) -> List[Dict[str, str]]: 53 | """ 54 | Returns a list of managed zones that match the domain filter. 55 | 56 | Returns: 57 | List[Dict[str, str]]: List of zones 58 | """ 59 | self.logger.debug("Attempting to fetch zones from Cloudflare API...") 60 | try: 61 | # Get all zones using list method and direct keyword arguments 62 | zones_iterator = self.cf.zones.list(per_page=100) 63 | zones = list(zones_iterator) 64 | 65 | self.logger.debug(f"Received {len(zones)} raw zones from Cloudflare API.") 66 | if not zones: 67 | self.logger.warning("Cloudflare API returned an empty list of zones.") 68 | else: 69 | # Log the names of received zones using attribute access 70 | zone_names = [getattr(zone, "name", "N/A") for zone in zones] 71 | self.logger.debug(f"Raw zone names received: {zone_names}") 72 | 73 | # Filter zones 74 | filtered_zones = [] 75 | self.logger.debug( 76 | f"Filtering zones based on domain_filter: {self.domain_filter} and exclude_domains: {self.exclude_domains}" 77 | ) 78 | for zone in zones: 79 | # Use attribute access 80 | zone_name = getattr(zone, "name", None) 81 | zone_id = getattr(zone, "id", None) 82 | if not zone_name or not zone_id: 83 | self.logger.warning( 84 | f"Skipping zone object due to missing name or id: {zone}" 85 | ) 86 | continue 87 | 88 | self.logger.debug( 89 | f"Processing zone: Name='{zone_name}', ID='{zone_id}'" 90 | ) 91 | 92 | # Check if zone should be excluded 93 | if self._matches_domain_filter(zone_name, self.exclude_domains): 94 | self.logger.debug( 95 | f"Zone '{zone_name}' is in exclude_domains, skipping." 96 | ) 97 | continue 98 | 99 | # Check if zone is in include filter (if filter is defined) 100 | if self.domain_filter and not self._matches_domain_filter( 101 | zone_name, self.domain_filter 102 | ): 103 | self.logger.debug( 104 | f"Zone '{zone_name}' is not in domain_filter, skipping." 105 | ) 106 | continue 107 | 108 | self.logger.debug( 109 | f"Zone '{zone_name}' passed filters. Adding to managed zones." 110 | ) 111 | # Store as dict for consistency with downstream usage (might need refactor later) 112 | filtered_zones.append({"id": zone_id, "name": zone_name}) 113 | 114 | # Cache zone ID 115 | self.zone_id_cache[zone_name] = zone_id 116 | 117 | self.logger.debug( 118 | f"Finished filtering. Found {len(filtered_zones)} managed zones." 119 | ) 120 | return filtered_zones 121 | # Use cloudflare.APIError for specific API errors 122 | except cloudflare.APIError as e: 123 | error_code = getattr(e, "code", "N/A") 124 | error_message = getattr(e, "message", str(e)) 125 | self.logger.error( 126 | f"Cloudflare API Error fetching zones: {e} (Code: {error_code}, Message: {error_message})" 127 | ) 128 | return [] 129 | # Catch other potential Cloudflare errors 130 | except cloudflare.CloudflareError as e: 131 | self.logger.error(f"General Cloudflare Error fetching zones: {e}") 132 | return [] 133 | except Exception as e: 134 | self.logger.exception( 135 | f"An unexpected error occurred while fetching zones: {e}" 136 | ) 137 | return [] 138 | 139 | async def records(self) -> List[Endpoint]: 140 | """ 141 | Returns a list of all DNS records in managed zones. 142 | 143 | Returns: 144 | List[Endpoint]: List of endpoints 145 | """ 146 | endpoints = [] 147 | 148 | # Get managed zones 149 | zones = await self.zones() 150 | 151 | for zone in zones: 152 | # These come from the dict created in zones() method 153 | zone_id = zone["id"] 154 | zone_name = zone["name"] 155 | 156 | try: 157 | # Get all DNS records for the zone using client.dns.records 158 | dns_records_iterator = self.cf.dns.records.list( 159 | zone_id=zone_id, per_page=100 160 | ) 161 | dns_records = list(dns_records_iterator) 162 | 163 | for record in dns_records: 164 | # Use attribute access for record object 165 | record_type = getattr(record, "type", None) 166 | record_name = getattr(record, "name", None) 167 | record_content = getattr(record, "content", None) 168 | record_ttl = getattr( 169 | record, "ttl", None 170 | ) # Default handled by Endpoint 171 | record_proxied = getattr( 172 | record, "proxied", False 173 | ) # Default handled by Endpoint 174 | 175 | if not all([record_type, record_name, record_content]): 176 | self.logger.warning( 177 | f"Skipping record object due to missing type, name, or content: {record}" 178 | ) 179 | continue 180 | 181 | # Skip TXT records (they are managed by the registry) 182 | if record_type == "TXT": 183 | continue 184 | 185 | # Create endpoint 186 | endpoint = Endpoint( 187 | dnsname=record_name, 188 | targets=[record_content], # Assuming single content for non-TXT 189 | record_type=record_type, 190 | record_ttl=record_ttl, 191 | proxied=record_proxied, 192 | ) 193 | endpoints.append(endpoint) 194 | # Use cloudflare.APIError for specific API errors 195 | except cloudflare.APIError as e: 196 | error_code = getattr(e, "code", "N/A") 197 | error_message = getattr(e, "message", str(e)) 198 | self.logger.error( 199 | f"Cloudflare API Error fetching records for zone {zone_name}: {e} (Code: {error_code}, Message: {error_message})" 200 | ) 201 | # Catch other potential Cloudflare errors 202 | except cloudflare.CloudflareError as e: 203 | self.logger.error( 204 | f"General Cloudflare Error fetching records for zone {zone_name}: {e}" 205 | ) 206 | except Exception as e: 207 | self.logger.exception( 208 | f"An unexpected error occurred while fetching records for zone {zone_name}: {e}" 209 | ) 210 | 211 | return endpoints 212 | 213 | async def apply_changes(self, changes: Changes) -> None: 214 | """ 215 | Applies the specified changes to DNS records. 216 | 217 | Args: 218 | changes: Changes to apply 219 | """ 220 | if self.dry_run: 221 | self.logger.info("Dry run mode, not applying changes") 222 | return 223 | 224 | # Create new records 225 | for endpoint in changes.create: 226 | await self._create_record(endpoint) 227 | 228 | # Update existing records 229 | for i in range(len(changes.update_old)): 230 | old_endpoint = changes.update_old[i] 231 | new_endpoint = changes.update_new[i] 232 | await self._update_record(old_endpoint, new_endpoint) 233 | 234 | # Delete records 235 | for endpoint in changes.delete: 236 | await self._delete_record(endpoint) 237 | 238 | async def _create_record(self, endpoint: Endpoint) -> None: 239 | """ 240 | Creates a new DNS record. 241 | 242 | Args: 243 | endpoint: Endpoint to create 244 | """ 245 | # Get zone ID for the endpoint 246 | zone_id = await self._get_zone_id_for_endpoint(endpoint) 247 | if not zone_id: 248 | self.logger.error(f"Could not find zone ID for endpoint {endpoint.dnsname}") 249 | return 250 | 251 | # Create record 252 | try: 253 | for target in endpoint.targets: 254 | # Prepare record data according to v4 schema 255 | # Revert TTL logic: Use endpoint TTL if provided, otherwise use 1 (Auto) 256 | record_ttl = ( 257 | endpoint.record_ttl if endpoint.record_ttl is not None else 1 258 | ) 259 | 260 | record_data = { 261 | "name": endpoint.dnsname, 262 | "type": endpoint.record_type, 263 | "content": target, 264 | "proxied": endpoint.proxied, 265 | "ttl": record_ttl, # Use original TTL logic (1=Auto) 266 | } 267 | 268 | self.logger.info( 269 | f"Creating DNS record: {endpoint.record_type} {record_data['name']} -> {record_data['content']} (TTL: {record_data.get('ttl', 'Auto')}, Proxied: {record_data['proxied']})" 270 | ) 271 | # Use create method from client.dns.records with direct keyword arguments 272 | self.cf.dns.records.create( 273 | zone_id=zone_id, 274 | name=record_data["name"], 275 | type=record_data["type"], 276 | content=record_data["content"], 277 | ttl=record_data["ttl"], 278 | proxied=record_data["proxied"], 279 | ) 280 | # Use cloudflare.APIError for specific API errors 281 | except cloudflare.APIError as e: 282 | error_code = getattr(e, "code", "N/A") 283 | error_message = getattr(e, "message", str(e)) 284 | self.logger.error( 285 | f"Cloudflare API Error creating DNS record for {endpoint.dnsname}: {e} (Code: {error_code}, Message: {error_message})" 286 | ) 287 | except cloudflare.CloudflareError as e: 288 | self.logger.error( 289 | f"General Cloudflare Error creating DNS record for {endpoint.dnsname}: {e}" 290 | ) 291 | except Exception as e: 292 | self.logger.exception( 293 | f"An unexpected error occurred while creating DNS record for {endpoint.dnsname}: {e}" 294 | ) 295 | 296 | async def _update_record( 297 | self, old_endpoint: Endpoint, new_endpoint: Endpoint 298 | ) -> None: 299 | """ 300 | Updates an existing DNS record. 301 | 302 | Args: 303 | old_endpoint: Old endpoint 304 | new_endpoint: New endpoint 305 | """ 306 | # Get zone ID for the endpoint 307 | zone_id = await self._get_zone_id_for_endpoint(new_endpoint) 308 | if not zone_id: 309 | self.logger.error( 310 | f"Could not find zone ID for endpoint {new_endpoint.dnsname}" 311 | ) 312 | return 313 | 314 | # Get record ID 315 | # Use old_endpoint to find the record to update 316 | record_id = await self._get_record_id(zone_id, old_endpoint) 317 | if not record_id: 318 | # If old record not found, try creating the new one instead of failing 319 | self.logger.warning( 320 | f"Record ID not found for updating {old_endpoint.dnsname} ({old_endpoint.record_type}). Attempting to create instead." 321 | ) 322 | await self._create_record(new_endpoint) 323 | return 324 | 325 | # Update record 326 | try: 327 | # Ensure targets list is not empty for new_endpoint 328 | if not new_endpoint.targets: 329 | self.logger.error( 330 | f"Cannot update record {new_endpoint.dnsname}: No targets specified." 331 | ) 332 | return 333 | 334 | # Prepare record data according to v4 schema 335 | record_ttl = ( 336 | new_endpoint.record_ttl if new_endpoint.record_ttl is not None else 1 337 | ) # Revert TTL logic (1=Auto) 338 | record_data = { 339 | "name": new_endpoint.dnsname, 340 | "type": new_endpoint.record_type, 341 | "content": new_endpoint.targets[0], # Use first target for update 342 | "proxied": new_endpoint.proxied, 343 | "ttl": record_ttl, # Use original TTL logic (1=Auto) 344 | } 345 | 346 | self.logger.info( 347 | f"Updating DNS record: {record_id} ({old_endpoint.dnsname} -> {new_endpoint.dnsname}) Type: {new_endpoint.record_type}, Content: {record_data['content']}, TTL: {record_data.get('ttl', 'Auto')}, Proxied: {record_data['proxied']})" 348 | ) 349 | # Use update method from client.dns.records with direct keyword arguments 350 | self.cf.dns.records.update( 351 | dns_record_id=record_id, 352 | zone_id=zone_id, 353 | name=record_data["name"], 354 | type=record_data["type"], 355 | content=record_data["content"], 356 | ttl=record_data["ttl"], 357 | proxied=record_data["proxied"], 358 | ) 359 | # Use cloudflare.APIError for specific API errors 360 | except cloudflare.APIError as e: 361 | error_code = getattr(e, "code", "N/A") 362 | self.logger.error( 363 | f"Cloudflare API Error updating DNS record {record_id} for {new_endpoint.dnsname}: {e} (Code: {error_code})" 364 | ) 365 | except cloudflare.CloudflareError as e: 366 | self.logger.error( 367 | f"General Cloudflare Error updating DNS record {record_id} for {new_endpoint.dnsname}: {e}" 368 | ) 369 | except Exception as e: 370 | self.logger.exception( 371 | f"An unexpected error occurred while updating DNS record for {new_endpoint.dnsname}: {e}" 372 | ) 373 | 374 | async def _delete_record(self, endpoint: Endpoint) -> None: 375 | """ 376 | Deletes a DNS record. 377 | 378 | Args: 379 | endpoint: Endpoint to delete 380 | """ 381 | # Get zone ID for the endpoint 382 | zone_id = await self._get_zone_id_for_endpoint(endpoint) 383 | if not zone_id: 384 | self.logger.error( 385 | f"Could not find zone ID for endpoint {endpoint.dnsname} during deletion." 386 | ) 387 | return 388 | 389 | # Get record ID 390 | record_id = await self._get_record_id(zone_id, endpoint) 391 | if not record_id: 392 | self.logger.warning( 393 | f"Could not find record ID for deleting endpoint {endpoint.dnsname} ({endpoint.record_type}). Skipping deletion." 394 | ) 395 | return 396 | 397 | # Delete record 398 | try: 399 | # Include record type in the log message 400 | self.logger.info( 401 | f"Deleting DNS record: {endpoint.record_type} {endpoint.dnsname} (ID: {record_id})" 402 | ) 403 | # Use delete method from client.dns.records with zone_id and record_id kwargs 404 | self.cf.dns.records.delete(dns_record_id=record_id, zone_id=zone_id) 405 | # Use cloudflare.APIError for specific API errors 406 | except cloudflare.APIError as e: 407 | error_code = getattr(e, "code", "N/A") 408 | error_message = getattr(e, "message", str(e)) 409 | self.logger.error( 410 | f"Cloudflare API Error deleting DNS record {record_id} for {endpoint.dnsname}: {e} (Code: {error_code}, Message: {error_message})" 411 | ) 412 | except cloudflare.CloudflareError as e: 413 | self.logger.error( 414 | f"General Cloudflare Error deleting DNS record {record_id} for {endpoint.dnsname}: {e}" 415 | ) 416 | except Exception as e: 417 | self.logger.exception( 418 | f"An unexpected error occurred while deleting DNS record for {endpoint.dnsname}: {e}" 419 | ) 420 | 421 | async def _get_zone_id_for_endpoint(self, endpoint: Endpoint) -> Optional[str]: 422 | """ 423 | Gets the zone ID for an endpoint. 424 | 425 | Args: 426 | endpoint: Endpoint 427 | 428 | Returns: 429 | Optional[str]: Zone ID 430 | """ 431 | # Extract domain from endpoint 432 | domain = self._extract_domain_from_hostname(endpoint.dnsname) 433 | self.logger.debug(f"Extracted domain: {domain}") 434 | if not domain: 435 | return None 436 | 437 | # Check cache 438 | if domain in self.zone_id_cache: 439 | return self.zone_id_cache[domain] 440 | 441 | # Get zones (returns list of dicts now, as processed in zones() method) 442 | zones = await self.zones() 443 | self.logger.debug(f"Checking against managed zones: {zones}") 444 | # Find matching zone (comparing domain against dict 'name' key) 445 | for zone in zones: 446 | zone_name = zone.get("name") 447 | zone_id = zone.get("id") 448 | if not zone_name or not zone_id: 449 | continue # Should not happen if zones() filters correctly 450 | 451 | self.logger.debug(f"Checking zone: {zone_name}") 452 | if zone_name == domain: 453 | # Zone ID is already cached by zones() call, but return it directly 454 | self.logger.debug( 455 | f"Found matching zone ID: {zone_id} for domain {domain}" 456 | ) 457 | # Ensure cache consistency (though zones() should handle this) 458 | self.zone_id_cache[domain] = zone_id 459 | return zone_id 460 | 461 | self.logger.warning( 462 | f"No matching zone found for domain: {domain} among managed zones." 463 | ) 464 | return None 465 | 466 | async def _get_record_id(self, zone_id: str, endpoint: Endpoint) -> Optional[str]: 467 | """ 468 | Gets the record ID for an endpoint. 469 | 470 | Args: 471 | zone_id: Zone ID 472 | endpoint: Endpoint 473 | 474 | Returns: 475 | Optional[str]: Record ID 476 | """ 477 | try: 478 | # Get all DNS records for the zone using client.dns.records and zone_id kwarg 479 | dns_records_iterator = self.cf.dns.records.list( 480 | zone_id=zone_id, 481 | name=endpoint.dnsname, 482 | type=endpoint.record_type, 483 | per_page=5, # Usually only expect 1, but check a few 484 | ) 485 | dns_records = list(dns_records_iterator) 486 | 487 | # Find matching record (v4 returns objects with attributes) 488 | for record in dns_records: 489 | # Use attribute access with getattr for safety 490 | record_name = getattr(record, "name", None) 491 | record_type = getattr(record, "type", None) 492 | record_id = getattr(record, "id", None) 493 | 494 | if ( 495 | record_name == endpoint.dnsname 496 | and record_type == endpoint.record_type 497 | and record_id 498 | ): 499 | # Found the specific record 500 | return record_id 501 | 502 | self.logger.debug( 503 | f"Did not find existing record ID for {endpoint.dnsname} ({endpoint.record_type}) in zone {zone_id}" 504 | ) 505 | return None 506 | # Use cloudflare.APIError for specific API errors 507 | except cloudflare.APIError as e: 508 | error_code = getattr(e, "code", "N/A") 509 | error_message = getattr(e, "message", str(e)) 510 | self.logger.error( 511 | f"Cloudflare API Error getting record ID for {endpoint.dnsname}: {e} (Code: {error_code}, Message: {error_message})" 512 | ) 513 | return None 514 | except cloudflare.CloudflareError as e: 515 | self.logger.error( 516 | f"General Cloudflare Error getting record ID for {endpoint.dnsname}: {e}" 517 | ) 518 | return None 519 | except Exception as e: 520 | self.logger.exception( 521 | f"An unexpected error occurred while getting record ID for {endpoint.dnsname}: {e}" 522 | ) 523 | return None 524 | 525 | @staticmethod 526 | def _extract_domain_from_hostname(hostname: str) -> Optional[str]: 527 | """ 528 | Extracts the domain from a hostname. 529 | 530 | Args: 531 | hostname: Hostname 532 | 533 | Returns: 534 | Optional[str]: Domain 535 | """ 536 | # Split hostname into parts 537 | parts = hostname.split(".") 538 | 539 | # Handle special cases 540 | if len(parts) <= 1: 541 | return None 542 | 543 | # For hostnames with more than 2 parts, try to find the domain 544 | if len(parts) > 2: 545 | # Check for common TLDs 546 | if parts[-2] + "." + parts[-1] in [ 547 | "com.au", 548 | "co.uk", 549 | "co.nz", 550 | "co.za", 551 | "com.br", 552 | "com.mx", 553 | ]: 554 | return ".".join(parts[-3:]) 555 | 556 | # Default to last 2 parts 557 | return ".".join(parts[-2:]) 558 | 559 | # For hostnames with exactly 2 parts, use the whole hostname 560 | return hostname 561 | 562 | @staticmethod 563 | def _matches_domain_filter(domain: str, domain_filter: List[str]) -> bool: 564 | """ 565 | Check if a domain matches the domain filter. 566 | 567 | Args: 568 | domain: Domain 569 | domain_filter: Domain filter list 570 | 571 | Returns: 572 | bool: True if the domain matches any entry in the filter, False otherwise 573 | """ 574 | if not domain_filter: 575 | # If the filter list is empty, no match is possible. 576 | return False 577 | 578 | for filter_domain in domain_filter: 579 | # Check for wildcard 580 | if filter_domain.startswith("*."): 581 | # Convert wildcard to regex 582 | pattern = "^.*\\." + re.escape(filter_domain[2:]) + "$" 583 | if re.match(pattern, domain): 584 | # Found a wildcard match 585 | return True 586 | elif filter_domain == domain: 587 | # Found an exact match 588 | return True 589 | 590 | # If the loop completes without finding any match in the list 591 | return False 592 | -------------------------------------------------------------------------------- /sherpa_dns/registry/txt_registry.py: -------------------------------------------------------------------------------- 1 | """ 2 | TXT registry module for Sherpa-DNS. 3 | 4 | This module is responsible for tracking which DNS records are managed by Sherpa-DNS 5 | using TXT records. 6 | """ 7 | 8 | import base64 9 | import logging 10 | from typing import Dict, List, Optional 11 | 12 | import cloudflare 13 | from cryptography.fernet import Fernet 14 | from cryptography.hazmat.primitives import hashes 15 | from cryptography.hazmat.primitives.kdf.pbkdf2 import PBKDF2HMAC 16 | 17 | from sherpa_dns.models.models import Changes, Endpoint 18 | 19 | 20 | class TXTRegistry: 21 | """ 22 | Registry that tracks DNS records using TXT records. 23 | """ 24 | 25 | def __init__( 26 | self, 27 | provider, 28 | txt_prefix: str = "sherpa-dns-", 29 | txt_owner_id: str = "default", 30 | txt_wildcard_replacement: str = "star", 31 | encrypt_txt: bool = False, 32 | encryption_key: Optional[str] = None, 33 | ): 34 | """ 35 | Initialize a TXTRegistry. 36 | 37 | Args: 38 | provider: DNS provider 39 | txt_prefix: Prefix for TXT records 40 | txt_owner_id: Owner ID for TXT records 41 | txt_wildcard_replacement: Replacement for wildcards in TXT records 42 | encrypt_txt: Whether to encrypt TXT records 43 | encryption_key: Encryption key for TXT records 44 | """ 45 | self.provider = provider 46 | self.txt_prefix = txt_prefix 47 | self.txt_owner_id = txt_owner_id 48 | self.txt_wildcard_replacement = txt_wildcard_replacement 49 | self.encrypt_txt = encrypt_txt 50 | self.encryption_key = encryption_key 51 | self.logger = logging.getLogger("sherpa-dns.registry.txt") 52 | 53 | # Initialize encryption if enabled 54 | self.fernet = None 55 | if self.encrypt_txt and self.encryption_key: 56 | self.fernet = self._create_fernet(self.encryption_key) 57 | 58 | async def records(self) -> List[Endpoint]: 59 | """ 60 | Returns a list of all DNS records managed by this Sherpa-DNS instance. 61 | 62 | Returns: 63 | List[Endpoint]: List of endpoints 64 | """ 65 | # Get all records from provider 66 | all_records = await self.provider.records() 67 | 68 | # Get all TXT records 69 | txt_records = await self._get_txt_records() 70 | 71 | # Filter records based on TXT records 72 | managed_records = [] 73 | for record in all_records: 74 | # Get TXT record name for this record 75 | txt_record_name = self._get_txt_record_name(record) 76 | 77 | # Check if TXT record exists 78 | if txt_record_name in txt_records: 79 | # Check if TXT record is owned by this instance 80 | txt_content = txt_records[txt_record_name] 81 | if self._is_owned_by_this_instance(txt_content): 82 | # Parse TXT record content 83 | parsed_content = self._parse_txt_content(txt_content) 84 | 85 | # Update record with parsed content, specifically handling TTL='auto' 86 | if "ttl" in parsed_content: 87 | ttl_value = parsed_content["ttl"] 88 | if ttl_value == "auto": 89 | record.record_ttl = 1 90 | else: 91 | try: 92 | record.record_ttl = int(ttl_value) 93 | except ValueError: 94 | self.logger.warning( 95 | f"Could not parse TTL value '{ttl_value}' from TXT record for {record.dnsname}. Skipping TTL update." 96 | ) 97 | 98 | managed_records.append(record) 99 | 100 | return managed_records 101 | 102 | async def sync(self, changes: Changes) -> None: 103 | """ 104 | Synchronizes the desired state with the current state. 105 | 106 | Args: 107 | changes: Changes to apply 108 | """ 109 | # Apply changes to DNS records 110 | await self.provider.apply_changes(changes) 111 | 112 | # Create TXT records for new endpoints 113 | for endpoint in changes.create: 114 | await self._create_txt_record(endpoint) 115 | 116 | # Update TXT records for updated endpoints 117 | for i in range(len(changes.update_old)): 118 | old_endpoint = changes.update_old[i] 119 | new_endpoint = changes.update_new[i] 120 | await self._update_txt_record(old_endpoint, new_endpoint) 121 | 122 | # Delete TXT records for deleted endpoints 123 | for endpoint in changes.delete: 124 | await self._delete_txt_record(endpoint) 125 | 126 | async def get_managed_endpoints(self) -> List[Endpoint]: 127 | """ 128 | Returns a list of all endpoints managed by this Sherpa-DNS instance. 129 | 130 | Returns: 131 | List[Endpoint]: List of managed endpoints 132 | """ 133 | return await self.records() 134 | 135 | def _get_txt_record_name(self, endpoint: Endpoint) -> str: 136 | """ 137 | Gets the TXT record name based on the endpoint's DNS name, 138 | applying the configured prefix. 139 | 140 | Args: 141 | endpoint: Endpoint 142 | 143 | Returns: 144 | str: TXT record name 145 | """ 146 | # Start with the prefixed name 147 | txt_name = f"{self.txt_prefix}{endpoint.dnsname}" 148 | 149 | # Replace wildcard character if present 150 | if "*" in txt_name: 151 | txt_name = txt_name.replace("*", self.txt_wildcard_replacement) 152 | self.logger.debug( 153 | f"Replaced wildcard in TXT name for {endpoint.dnsname}: {txt_name}" 154 | ) 155 | 156 | return txt_name 157 | 158 | def _get_txt_record_content(self, endpoint: Endpoint) -> str: 159 | """ 160 | Creates the content for a TXT record, optionally encrypting it. 161 | 162 | Args: 163 | endpoint: Endpoint 164 | 165 | Returns: 166 | str: TXT record content 167 | """ 168 | content = { 169 | "heritage": "sherpa-dns", 170 | "owner": self.txt_owner_id, 171 | "resource": "docker", 172 | } 173 | 174 | if endpoint.targets: 175 | content["targets"] = ",".join(endpoint.targets) 176 | 177 | # Special handling for TTL in TXT record content 178 | if endpoint.record_ttl is not None: 179 | if endpoint.record_ttl == 1: 180 | content["ttl"] = "auto" # Represent TTL 1 as 'auto' 181 | else: 182 | content["ttl"] = str(endpoint.record_ttl) 183 | 184 | # Convert to string 185 | content_str = ",".join([f"{k}={v}" for k, v in content.items()]) 186 | 187 | if self.encrypt_txt and self.fernet: 188 | # Encrypt the content 189 | return self._encrypt_txt_content(content_str) 190 | 191 | return content_str 192 | 193 | def _encrypt_txt_content(self, content: str) -> str: 194 | """ 195 | Encrypts the TXT record content using AES-256. 196 | 197 | Args: 198 | content: TXT record content 199 | 200 | Returns: 201 | str: Encrypted TXT record content 202 | """ 203 | if not self.fernet: 204 | return content 205 | 206 | # Encrypt the content 207 | encrypted = self.fernet.encrypt(content.encode()) 208 | 209 | # Return as base64-encoded string with version prefix 210 | return f"v1:AES256:{encrypted.decode()}" 211 | 212 | def _decrypt_txt_content(self, content: str) -> Optional[str]: 213 | """ 214 | Decrypts the TXT record content. 215 | 216 | Args: 217 | content: Encrypted TXT record content 218 | 219 | Returns: 220 | Optional[str]: Decrypted TXT record content 221 | """ 222 | if not self.fernet: 223 | return content 224 | 225 | # Check if content is encrypted 226 | if not content.startswith("v1:AES256:"): 227 | return content 228 | 229 | try: 230 | # Extract encrypted content 231 | encrypted = content[10:] 232 | 233 | # Decrypt the content 234 | decrypted = self.fernet.decrypt(encrypted.encode()) 235 | 236 | return decrypted.decode() 237 | except Exception as e: 238 | self.logger.error(f"Error decrypting TXT record content: {e}") 239 | return None 240 | 241 | def _create_fernet(self, key: str) -> Fernet: 242 | """ 243 | Creates a Fernet instance for encryption/decryption. 244 | 245 | Args: 246 | key: Encryption key 247 | 248 | Returns: 249 | Fernet: Fernet instance 250 | """ 251 | # Use PBKDF2 to derive a key from the provided key 252 | salt = b"sherpa-dns" # Fixed salt for deterministic key derivation 253 | kdf = PBKDF2HMAC( 254 | algorithm=hashes.SHA256(), length=32, salt=salt, iterations=100000 255 | ) 256 | key_bytes = kdf.derive(key.encode()) 257 | 258 | # Encode key as URL-safe base64 259 | key_base64 = base64.urlsafe_b64encode(key_bytes) 260 | 261 | return Fernet(key_base64) 262 | 263 | async def _get_txt_records(self) -> Dict[str, str]: 264 | """ 265 | Gets all TXT records from the provider. 266 | 267 | Returns: 268 | Dict[str, str]: Dictionary of TXT record name to content 269 | """ 270 | txt_records = {} 271 | 272 | # Get managed zones 273 | zones = await self.provider.zones() 274 | 275 | for zone in zones: 276 | zone_id = zone["id"] 277 | zone_name = zone["name"] 278 | 279 | try: 280 | # Get all TXT records for the zone using client.dns.records.list 281 | dns_records_iterator = self.provider.cf.dns.records.list( 282 | zone_id=zone_id, type="TXT", per_page=100 283 | ) 284 | # Convert iterator to list 285 | dns_records = list(dns_records_iterator) 286 | 287 | for record in dns_records: 288 | # Use attribute access now since v4 returns objects 289 | record_type = getattr(record, "type", None) 290 | record_name = getattr(record, "name", None) 291 | raw_content = getattr(record, "content", None) 292 | 293 | # Check if record is a TXT record and has necessary attributes 294 | if ( 295 | record_type == "TXT" 296 | and record_name is not None 297 | and raw_content is not None 298 | ): 299 | # Strip quotes BEFORE processing (parsing or decryption) 300 | content_to_process = raw_content 301 | if raw_content.startswith('"') and raw_content.endswith('"'): 302 | content_to_process = raw_content[1:-1] 303 | 304 | processed_content = ( 305 | content_to_process # Default to stripped content 306 | ) 307 | 308 | # Decrypt content if encryption is enabled for the registry 309 | if self.encrypt_txt: 310 | # Pass the unquoted content to the decryption function 311 | processed_content = self._decrypt_txt_content( 312 | content_to_process 313 | ) 314 | 315 | # Add to dictionary if content was successfully processed/decrypted 316 | if processed_content: 317 | txt_records[record_name] = processed_content 318 | else: 319 | # Log if decryption failed (decrypt returns None on error) 320 | if self.encrypt_txt and content_to_process.startswith( 321 | "v1:AES256:" 322 | ): 323 | self.logger.warning( 324 | f"Failed to decrypt TXT record content for {record_name}. Record might be ignored." 325 | ) 326 | # Catch Cloudflare specific API errors first 327 | except cloudflare.APIError as e: 328 | error_code = getattr(e, "code", "N/A") 329 | error_message = getattr(e, "message", str(e)) 330 | self.logger.error( 331 | f"Cloudflare API Error fetching TXT records for zone {zone_name}: {e} (Code: {error_code}, Message: {error_message})" 332 | ) 333 | # Catch other Cloudflare errors 334 | except cloudflare.CloudflareError as e: 335 | self.logger.error( 336 | f"General Cloudflare Error fetching TXT records for zone {zone_name}: {e}" 337 | ) 338 | # Catch any other unexpected errors 339 | except Exception as e: 340 | # Log the full traceback for unexpected errors 341 | self.logger.exception( 342 | f"An unexpected error occurred while fetching TXT records for zone {zone_name}: {e}" 343 | ) 344 | 345 | return txt_records 346 | 347 | def _is_owned_by_this_instance(self, txt_content: str) -> bool: 348 | """ 349 | Checks if a TXT record is owned by this Sherpa-DNS instance. 350 | 351 | Args: 352 | txt_content: TXT record content 353 | 354 | Returns: 355 | bool: True if the TXT record is owned by this instance, False otherwise 356 | """ 357 | # Parse TXT record content 358 | parsed_content = self._parse_txt_content(txt_content) 359 | 360 | # Check if heritage is sherpa-dns 361 | if parsed_content.get("heritage") != "sherpa-dns": 362 | return False 363 | 364 | # Check if owner is this instance 365 | if parsed_content.get("owner") != self.txt_owner_id: 366 | return False 367 | 368 | return True 369 | 370 | def _parse_txt_content(self, txt_content: str) -> Dict[str, str]: 371 | """ 372 | Parses TXT record content into a dictionary. 373 | Format: "heritage=sherpa-dns,owner=default,resource=docker,ttl=auto" 374 | 375 | Args: 376 | txt_content: TXT record content 377 | 378 | Returns: 379 | Dict[str, str]: Parsed TXT record content 380 | """ 381 | # Strip leading/trailing quotes if present 382 | if txt_content.startswith('"') and txt_content.endswith('"'): 383 | txt_content = txt_content[1:-1] 384 | 385 | parsed_content = {} 386 | try: 387 | # Use dict comprehension for parsing 388 | parsed_content = { 389 | key.strip(): value.strip() 390 | for part in txt_content.split(",") 391 | if "=" in part # Ensure there's a separator 392 | for key, value in [part.split("=", 1)] # Split only once 393 | } 394 | except ValueError as e: 395 | # Log error if splitting fails unexpectedly 396 | self.logger.warning( 397 | f"Could not parse TXT content: '{txt_content}'. Error: {e}" 398 | ) 399 | return {} # Return empty dict on error 400 | 401 | return parsed_content 402 | 403 | async def _create_txt_record(self, endpoint: Endpoint) -> None: 404 | """ 405 | Creates a TXT record for an endpoint. 406 | 407 | Args: 408 | endpoint: Endpoint 409 | """ 410 | # Get TXT record name 411 | txt_record_name = self._get_txt_record_name(endpoint) 412 | 413 | # Get TXT record content 414 | txt_record_content = self._get_txt_record_content(endpoint) 415 | 416 | # Create TXT endpoint 417 | txt_endpoint = Endpoint( 418 | dnsname=txt_record_name, 419 | targets=[f'"{txt_record_content}"'], 420 | record_type="TXT", 421 | ) 422 | 423 | # Check if TXT record already exists 424 | try: 425 | # Get zone ID for the endpoint 426 | zone_id = await self.provider._get_zone_id_for_endpoint(txt_endpoint) 427 | if zone_id: 428 | # Check if record already exists 429 | existing_record_id = await self.provider._get_record_id(zone_id, txt_endpoint) 430 | if existing_record_id: 431 | # Get the existing TXT record content to check ownership 432 | try: 433 | dns_records_iterator = self.provider.cf.dns.records.list( 434 | zone_id=zone_id, 435 | name=txt_record_name, 436 | type="TXT", 437 | per_page=5, 438 | ) 439 | existing_records = list(dns_records_iterator) 440 | 441 | if existing_records: 442 | # Get the content of the existing record 443 | existing_content = getattr(existing_records[0], "content", "") 444 | # Strip quotes if present 445 | if existing_content.startswith('"') and existing_content.endswith('"'): 446 | existing_content = existing_content[1:-1] 447 | 448 | # Decrypt content if encryption is enabled 449 | if self.encrypt_txt: 450 | decrypted_content = self._decrypt_txt_content(existing_content) 451 | if decrypted_content: 452 | existing_content = decrypted_content 453 | else: 454 | self.logger.warning( 455 | f"Failed to decrypt TXT record content for {txt_record_name}. " 456 | f"Skipping ownership check and proceeding with creation." 457 | ) 458 | # If we can't decrypt, assume we don't own it to be safe 459 | existing_content = "" 460 | 461 | # Check if this instance owns the existing record 462 | if existing_content and self._is_owned_by_this_instance(existing_content): 463 | self.logger.warning( 464 | f"TXT record {txt_record_name} already exists and is owned by this instance (ID: {existing_record_id}). " 465 | f"This may indicate a cleanup issue. Updating existing record instead." 466 | ) 467 | # Update the existing record since we own it 468 | await self.provider._update_record(txt_endpoint, txt_endpoint) 469 | return 470 | elif existing_content: 471 | self.logger.warning( 472 | f"TXT record {txt_record_name} already exists but is owned by another instance. " 473 | f"Skipping creation to avoid conflicts." 474 | ) 475 | return 476 | else: 477 | self.logger.warning( 478 | f"TXT record {txt_record_name} exists but could not determine ownership. " 479 | f"Skipping creation to avoid conflicts." 480 | ) 481 | return 482 | except Exception as e: 483 | self.logger.warning( 484 | f"Error checking ownership of existing TXT record {txt_record_name}: {e}. " 485 | f"Proceeding with creation." 486 | ) 487 | 488 | # Check for multiple TXT records with the same name (duplicates) 489 | try: 490 | dns_records_iterator = self.provider.cf.dns.records.list( 491 | zone_id=zone_id, 492 | name=txt_record_name, 493 | type="TXT", 494 | per_page=10, # Check for multiple records 495 | ) 496 | existing_records = list(dns_records_iterator) 497 | 498 | if len(existing_records) > 1: 499 | self.logger.warning( 500 | f"Found {len(existing_records)} duplicate TXT records for {txt_record_name}. " 501 | f"Checking ownership before cleanup." 502 | ) 503 | 504 | # Only delete duplicates that are owned by this instance 505 | records_to_delete = [] 506 | for record in existing_records: 507 | record_id = getattr(record, "id", None) 508 | record_content = getattr(record, "content", "") 509 | 510 | if record_id and record_content: 511 | # Strip quotes if present 512 | if record_content.startswith('"') and record_content.endswith('"'): 513 | record_content = record_content[1:-1] 514 | 515 | # Decrypt content if encryption is enabled 516 | if self.encrypt_txt: 517 | decrypted_content = self._decrypt_txt_content(record_content) 518 | if decrypted_content: 519 | record_content = decrypted_content 520 | else: 521 | self.logger.warning( 522 | f"Failed to decrypt TXT record {record_id} content. " 523 | f"Skipping deletion to be safe." 524 | ) 525 | continue 526 | 527 | # Only delete if we own this record 528 | if self._is_owned_by_this_instance(record_content): 529 | records_to_delete.append(record_id) 530 | else: 531 | self.logger.info( 532 | f"Skipping deletion of TXT record {record_id} - owned by another instance" 533 | ) 534 | 535 | # Delete only our own duplicate records 536 | for record_id in records_to_delete: 537 | try: 538 | self.provider.cf.dns.records.delete( 539 | dns_record_id=record_id, 540 | zone_id=zone_id 541 | ) 542 | self.logger.info(f"Deleted duplicate TXT record {record_id} (owned by this instance)") 543 | except Exception as e: 544 | self.logger.error(f"Failed to delete duplicate TXT record {record_id}: {e}") 545 | 546 | # Now create the new record 547 | await self.provider._create_record(txt_endpoint) 548 | return 549 | 550 | except Exception as e: 551 | self.logger.warning( 552 | f"Error checking for duplicate TXT records {txt_record_name}: {e}. " 553 | f"Proceeding with creation." 554 | ) 555 | 556 | except Exception as e: 557 | self.logger.warning( 558 | f"Error checking for existing TXT record {txt_record_name}: {e}. " 559 | f"Proceeding with creation." 560 | ) 561 | 562 | # Create TXT record (only if it doesn't exist or we don't own existing ones) 563 | await self.provider._create_record(txt_endpoint) 564 | 565 | async def _update_txt_record( 566 | self, old_endpoint: Endpoint, new_endpoint: Endpoint 567 | ) -> None: 568 | """ 569 | Updates a TXT record for an endpoint. 570 | 571 | Args: 572 | old_endpoint: Old endpoint 573 | new_endpoint: New endpoint 574 | """ 575 | # Get old TXT record name 576 | old_txt_record_name = self._get_txt_record_name(old_endpoint) 577 | 578 | # Get new TXT record name 579 | new_txt_record_name = self._get_txt_record_name(new_endpoint) 580 | 581 | # Get TXT record content 582 | txt_record_content = self._get_txt_record_content(new_endpoint) 583 | 584 | # Create new TXT endpoint 585 | new_txt_endpoint = Endpoint( 586 | dnsname=new_txt_record_name, 587 | targets=[f'"{txt_record_content}"'], 588 | record_type="TXT", 589 | ) 590 | 591 | # If TXT record name has changed, delete old TXT record and create new one 592 | if old_txt_record_name != new_txt_record_name: 593 | # Create old TXT endpoint 594 | old_txt_endpoint = Endpoint( 595 | dnsname=old_txt_record_name, targets=[], record_type="TXT" 596 | ) 597 | 598 | # Delete old TXT record 599 | await self.provider._delete_record(old_txt_endpoint) 600 | 601 | # Create new TXT record 602 | await self.provider._create_record(new_txt_endpoint) 603 | else: 604 | # Create old TXT endpoint 605 | old_txt_endpoint = Endpoint( 606 | dnsname=old_txt_record_name, targets=[], record_type="TXT" 607 | ) 608 | 609 | # Update TXT record 610 | await self.provider._update_record(old_txt_endpoint, new_txt_endpoint) 611 | 612 | async def _delete_txt_record(self, endpoint: Endpoint) -> None: 613 | """ 614 | Deletes a TXT record for an endpoint. 615 | 616 | Args: 617 | endpoint: Endpoint 618 | """ 619 | # Get TXT record name 620 | txt_record_name = self._get_txt_record_name(endpoint) 621 | 622 | # Create TXT endpoint 623 | txt_endpoint = Endpoint(dnsname=txt_record_name, targets=[], record_type="TXT") 624 | 625 | # Delete TXT record 626 | await self.provider._delete_record(txt_endpoint) 627 | -------------------------------------------------------------------------------- /sherpa_dns/source/docker_container.py: -------------------------------------------------------------------------------- 1 | """ 2 | Docker container source module for Sherpa-DNS. 3 | 4 | This module is responsible for fetching metadata from Docker containers and 5 | extracting DNS configuration from labels. 6 | """ 7 | 8 | import asyncio 9 | import concurrent.futures 10 | import ipaddress 11 | import logging 12 | from typing import Dict, List, Optional, Set 13 | 14 | from docker.models.containers import Container 15 | 16 | import docker 17 | from sherpa_dns.models.models import Endpoint 18 | 19 | 20 | class DockerContainerSource: 21 | """ 22 | Source that fetches metadata from Docker containers. 23 | """ 24 | 25 | def __init__( 26 | self, label_prefix: str = "sherpa.dns", label_filter: Optional[str] = None 27 | ): 28 | """ 29 | Initialize a DockerContainerSource. 30 | 31 | Args: 32 | label_prefix: Prefix for DNS labels 33 | label_filter: Filter for container labels 34 | """ 35 | self.label_prefix = label_prefix 36 | self.label_filter = label_filter 37 | self.logger = logging.getLogger("sherpa-dns.source.docker") 38 | self.event_queue = asyncio.Queue() 39 | 40 | # Initialize Docker client with explicit configuration 41 | try: 42 | self.docker_client = docker.from_env() 43 | # Test connection 44 | self.docker_client.ping() 45 | self.logger.debug("Successfully connected to Docker daemon") 46 | except docker.errors.DockerException as e: 47 | self.logger.error(f"Error connecting to Docker daemon: {e}") 48 | # Try with explicit socket path 49 | try: 50 | self.logger.debug("Trying to connect with explicit socket path") 51 | self.docker_client = docker.DockerClient( 52 | base_url="unix://var/run/docker.sock" 53 | ) 54 | self.docker_client.ping() 55 | self.logger.debug( 56 | "Successfully connected to Docker daemon with explicit socket path" 57 | ) 58 | except docker.errors.DockerException as inner_e: 59 | self.logger.error( 60 | f"Error connecting to Docker daemon with explicit socket path: {inner_e}" 61 | ) 62 | # Initialize with None and retry later 63 | self.docker_client = None 64 | 65 | async def endpoints(self) -> List[Endpoint]: 66 | """ 67 | Returns a list of endpoint objects representing desired DNS records 68 | based on running containers with appropriate labels. 69 | 70 | Returns: 71 | List[Endpoint]: List of endpoints 72 | """ 73 | endpoints = [] 74 | 75 | # Try to reconnect if docker_client is None 76 | if self.docker_client is None: 77 | if not await self._reconnect_docker_client(): 78 | return [] # Failed to reconnect 79 | 80 | try: 81 | # Get all running containers 82 | # Note: This list call is synchronous, potentially blocking. Consider executor if becomes an issue. 83 | containers = self.docker_client.containers.list( 84 | filters={"status": "running"} 85 | ) 86 | 87 | for container in containers: 88 | # Filter containers by labels if filter is specified 89 | if self.label_filter and not self._matches_filter( 90 | container.labels, self.label_filter 91 | ): 92 | continue 93 | 94 | # Extract DNS configuration from labels 95 | container_endpoints = self._endpoints_from_container(container) 96 | endpoints.extend(container_endpoints) 97 | 98 | return endpoints 99 | except docker.errors.DockerException as e: 100 | self.logger.error(f"Error fetching containers: {e}") 101 | return [] 102 | 103 | async def _reconnect_docker_client(self) -> bool: 104 | """Attempts to reconnect the docker client. Returns True on success, False otherwise.""" 105 | if self.docker_client is not None: 106 | return True # Already connected 107 | 108 | self.logger.info("Attempting to reconnect to Docker daemon") 109 | try: 110 | # Try default first 111 | self.docker_client = docker.from_env() 112 | self.docker_client.ping() 113 | self.logger.debug("Successfully reconnected to Docker daemon (default)") 114 | return True 115 | except docker.errors.DockerException: 116 | self.logger.warning( 117 | "Failed reconnecting with default, trying explicit socket path" 118 | ) 119 | try: 120 | # Try explicit path 121 | self.docker_client = docker.DockerClient( 122 | base_url="unix://var/run/docker.sock" 123 | ) 124 | self.docker_client.ping() 125 | self.logger.debug( 126 | "Successfully reconnected to Docker daemon with explicit socket path" 127 | ) 128 | return True 129 | except docker.errors.DockerException as e: 130 | self.logger.error( 131 | f"Error reconnecting to Docker daemon with explicit socket path: {e}" 132 | ) 133 | self.docker_client = None # Ensure it's None on failure 134 | return False 135 | 136 | def _blocking_event_listener(self, loop: asyncio.AbstractEventLoop): 137 | """ 138 | Runs in a separate thread to listen for Docker events. 139 | This method should not be async as it runs in an executor thread. 140 | """ 141 | try: 142 | if self.docker_client is None: 143 | self.logger.error( 144 | "Event listener thread: Docker client is None. Cannot start." 145 | ) 146 | return 147 | 148 | self.logger.debug("Event listener thread polling Docker API for events.") 149 | # Note: events() is blocking when iterated 150 | events = self.docker_client.events( 151 | decode=True, filters={"type": "container"} 152 | ) 153 | for event in events: 154 | event_type = event.get("status") 155 | if event_type in [ 156 | "start", 157 | "die", 158 | "stop", 159 | "kill", 160 | "pause", 161 | "unpause", 162 | ]: # Added more events just in case 163 | # Put event onto the asyncio queue from the thread 164 | future = asyncio.run_coroutine_threadsafe( 165 | self.event_queue.put(event), loop 166 | ) 167 | try: 168 | # Wait briefly for the put to complete to avoid overwhelming queue on burst 169 | future.result(timeout=5) 170 | self.logger.debug( 171 | f"Event listener thread: Queued event {event_type} - {event.get('id', '')[:12]}" 172 | ) 173 | except (concurrent.futures.TimeoutError, asyncio.TimeoutError): 174 | self.logger.warning( 175 | "Event listener thread: Timeout waiting for event queue put." 176 | ) 177 | except Exception as e: 178 | # Catch specific exceptions if possible, e.g., queue full? 179 | self.logger.error( 180 | f"Event listener thread: Error getting result from queue put: {e}" 181 | ) 182 | else: 183 | # Log other events at debug level if needed 184 | # self.logger.debug(f"Event listener thread: Ignoring event type {event_type}") 185 | pass 186 | 187 | except docker.errors.APIError as e: 188 | # Handle API errors specifically (e.g., connection lost during stream) 189 | self.logger.error( 190 | f"Event listener thread: Docker APIError: {e}. Listener stopping." 191 | ) 192 | # Consider resetting docker_client to None here to force reconnect attempt 193 | self.docker_client = None 194 | except docker.errors.DockerException as e: 195 | # Catch other docker exceptions 196 | self.logger.error( 197 | f"Event listener thread: DockerException: {e}. Listener stopping." 198 | ) 199 | self.docker_client = None 200 | except Exception as e: 201 | # Catch any other unexpected errors in the thread 202 | self.logger.exception( 203 | f"Event listener thread: Unexpected error: {e}. Listener stopping." 204 | ) 205 | finally: 206 | # This block ensures the log message is printed even if events() returns (e.g., daemon stopped) 207 | self.logger.info("Event listener thread finished.") 208 | 209 | async def watch_events(self) -> None: 210 | """ 211 | Watch Docker events non-blockingly using a separate thread. 212 | Includes reconnection logic for the listener thread. 213 | """ 214 | self.logger.debug( 215 | "Docker source event watcher task started (using thread executor)." 216 | ) 217 | 218 | while True: # Keep trying to run the listener thread 219 | if not await self._reconnect_docker_client(): 220 | self.logger.info( 221 | "Event watcher task: Failed to connect to Docker. Retrying in 10 seconds..." 222 | ) 223 | await asyncio.sleep(10) 224 | continue # Go back to start of while loop to try reconnecting 225 | 226 | loop = asyncio.get_running_loop() 227 | listener_future = None 228 | try: 229 | # Run the blocking listener in a thread pool executor 230 | # self._blocking_event_listener requires the loop argument now 231 | listener_future = loop.run_in_executor( 232 | None, self._blocking_event_listener, loop 233 | ) 234 | await listener_future # Wait for the listener thread to complete (which it shouldn't unless error/daemon stop) 235 | 236 | # If we get here, the listener thread exited. Log and prepare to restart. 237 | self.logger.warning( 238 | "Event listener thread exited. Will attempt restart after delay." 239 | ) 240 | # The thread itself should have logged the reason for exit. 241 | # Ensure docker_client is reset if the thread failed due to Docker issues. 242 | if self.docker_client is not None: 243 | try: 244 | # Quick check if connection is still valid 245 | self.docker_client.ping() 246 | except docker.errors.DockerException: 247 | self.logger.info( 248 | "Docker connection seems lost, resetting client." 249 | ) 250 | self.docker_client = None 251 | 252 | except concurrent.futures.CancelledError: 253 | self.logger.info("Event watcher task cancelled.") 254 | # If the task was cancelled, cancel the executor future too 255 | if listener_future and not listener_future.done(): 256 | listener_future.cancel() 257 | break # Exit the loop if the watcher task is cancelled 258 | except Exception as e: 259 | # Catch errors related to starting or managing the executor task 260 | self.logger.exception( 261 | f"Error running or awaiting event listener thread: {e}" 262 | ) 263 | # Consider if docker_client needs reset here too 264 | self.docker_client = None 265 | 266 | # Wait before restarting the listener attempt 267 | self.logger.info( 268 | "Waiting 10 seconds before attempting to restart listener thread..." 269 | ) 270 | await asyncio.sleep(10) 271 | 272 | def _endpoints_from_container(self, container: Container) -> List[Endpoint]: 273 | """ 274 | Generate endpoints from a single container's labels. 275 | 276 | Args: 277 | container: Docker container 278 | 279 | Returns: 280 | List[Endpoint]: List of endpoints 281 | """ 282 | endpoints = [] 283 | 284 | # Get container details 285 | container_id = container.id 286 | container_name = container.name 287 | container_labels = container.labels 288 | 289 | # Find all DNS hostnames defined in labels 290 | hostnames = self._get_hostnames_from_labels(container_labels) 291 | 292 | for hostname in hostnames: 293 | # Get DNS configuration from labels 294 | record_type = self._get_label_value(container_labels, hostname, "type", "A") 295 | ttl_str = self._get_label_value(container_labels, hostname, "ttl", None) 296 | proxied_str = self._get_label_value( 297 | container_labels, hostname, "proxied", "false" 298 | ) 299 | target = self._get_label_value(container_labels, hostname, "target", None) 300 | network_name = self._get_label_value( 301 | container_labels, hostname, "network", None 302 | ) # Get network name from label 303 | 304 | # Convert TTL to integer 305 | record_ttl = int(ttl_str) if ttl_str and ttl_str.isdigit() else None 306 | 307 | # Convert proxied to boolean 308 | proxied = proxied_str.lower() == "true" 309 | 310 | # Get targets 311 | targets = [] 312 | if target: 313 | # If a specific target is provided, use it directly 314 | targets = [target] 315 | elif record_type in ["A", "AAAA"]: 316 | # For A/AAAA records, find container IP on the specified or default network 317 | container_ip = self._get_container_ip( 318 | container, network_name 319 | ) # Pass network_name 320 | if container_ip: 321 | # Check if IP matches record type (IPv4 for A, IPv6 for AAAA) 322 | ip_obj = ipaddress.ip_address(container_ip) 323 | if record_type == "A" and isinstance(ip_obj, ipaddress.IPv4Address): 324 | targets = [container_ip] 325 | elif record_type == "AAAA" and isinstance( 326 | ip_obj, ipaddress.IPv6Address 327 | ): 328 | targets = [container_ip] 329 | else: 330 | self.logger.warning( 331 | f"IP address {container_ip} type mismatch for record type {record_type} on container {container_name}. Skipping." 332 | ) 333 | 334 | elif record_type == "CNAME": 335 | # For CNAME records, use container name as target if not specified 336 | # Note: Using container name might not be resolvable outside the Docker host's context. 337 | # Consider requiring an explicit target for CNAME or using a service discovery mechanism. 338 | targets = [ 339 | f"{container_name}" 340 | ] # Default CNAME target to container name 341 | 342 | # Create endpoint if targets were determined 343 | if targets: 344 | endpoint = Endpoint( 345 | dnsname=hostname, 346 | targets=targets, 347 | record_type=record_type, 348 | record_ttl=record_ttl, 349 | proxied=proxied, 350 | container_id=container_id, # Pass container ID 351 | container_name=container_name, # Pass container name for reference/logging 352 | ) 353 | endpoints.append(endpoint) 354 | self.logger.debug(f"Created endpoint: {endpoint}") 355 | else: 356 | # Log why no endpoint was created (e.g., no suitable IP found) 357 | self.logger.warning( 358 | f"No suitable target found for hostname {hostname} (Type: {record_type}, Network: {network_name or 'default'}) in container {container_name} ({container_id[:12]})" 359 | ) 360 | 361 | return endpoints 362 | 363 | def _get_hostnames_from_labels(self, labels: Dict[str, str]) -> Set[str]: 364 | """ 365 | Get all hostnames defined in container labels using the configured prefix. 366 | Handles both single 'hostname' label and multiple 'hostname.alias' labels. 367 | 368 | Args: 369 | labels: Container labels dictionary 370 | 371 | Returns: 372 | Set[str]: A set of unique hostnames found in labels. 373 | """ 374 | hostnames = set() 375 | # Example: sherpa.dns/hostname=app.example.com 376 | hostname_label = f"{self.label_prefix}/hostname" 377 | if hostname_label in labels: 378 | # Support comma-separated hostnames in the main label 379 | names = [ 380 | name.strip() 381 | for name in labels[hostname_label].split(",") 382 | if name.strip() 383 | ] 384 | hostnames.update(names) 385 | 386 | # Example: sherpa.dns/hostname.web=web.example.com, sherpa.dns/hostname.api=api.example.com 387 | hostname_prefix = f"{self.label_prefix}/hostname." 388 | for label, value in labels.items(): 389 | if label.startswith(hostname_prefix): 390 | # The part after prefix is the alias, the value is the hostname 391 | # Support comma-separated hostnames in alias labels too 392 | names = [name.strip() for name in value.split(",") if name.strip()] 393 | hostnames.update(names) 394 | 395 | if not hostnames: 396 | self.logger.debug( 397 | "No hostnames found in labels using prefix %s", self.label_prefix 398 | ) 399 | 400 | return hostnames 401 | 402 | def _get_label_value( 403 | self, 404 | labels: Dict[str, str], 405 | hostname: str, 406 | key: str, 407 | default: Optional[str] = None, 408 | ) -> Optional[str]: 409 | """ 410 | Get a label value for a specific hostname and key, supporting aliases. 411 | Looks for: 412 | 1. sherpa.dns/hostname.. (if hostname corresponds to an alias) 413 | 2. sherpa.dns/. (hostname-specific key) - DEPRECATED STYLE? Should standardize. 414 | 3. sherpa.dns/ (generic key) 415 | 416 | Args: 417 | labels: Container labels 418 | hostname: Hostname (which might be a value from hostname.alias label) 419 | key: Label key (e.g., 'ttl', 'type') 420 | default: Default value 421 | 422 | Returns: 423 | Optional[str]: Label value or default. 424 | """ 425 | 426 | # Find if the hostname was defined via an alias label (e.g., sherpa.dns/hostname.web=...) 427 | alias_key = None 428 | hostname_alias_prefix = f"{self.label_prefix}/hostname." 429 | for label, value in labels.items(): 430 | if label.startswith(hostname_alias_prefix): 431 | # Check if the current hostname is among the values of this alias label 432 | defined_hostnames = { 433 | name.strip() for name in value.split(",") if name.strip() 434 | } 435 | if hostname in defined_hostnames: 436 | alias_key = label[ 437 | len(hostname_alias_prefix) : 438 | ] # Extract the alias (e.g., 'web') 439 | break # Found the alias that defines this hostname 440 | 441 | # 1. Check for alias-specific key (e.g., sherpa.dns/ttl.web=60) - Preferred for aliased hostnames 442 | if alias_key: 443 | alias_specific_key = f"{self.label_prefix}/{key}.{alias_key}" 444 | if alias_specific_key in labels: 445 | return labels[alias_specific_key] 446 | 447 | # 2. Check for hostname-specific key (e.g., sherpa.dns/ttl.app.example.com=60) - Less common/maybe deprecated? 448 | # This style might conflict if hostname contains '.' - might need adjustment 449 | # hostname_specific_key = f"{self.label_prefix}/{key}.{hostname}" 450 | # if hostname_specific_key in labels: 451 | # return labels[hostname_specific_key] 452 | 453 | # 3. Check for generic key (e.g., sherpa.dns/ttl=300) - Fallback for all hostnames on the container 454 | generic_key = f"{self.label_prefix}/{key}" 455 | if generic_key in labels: 456 | return labels[generic_key] 457 | 458 | return default 459 | 460 | def _get_container_ip( 461 | self, container: Container, network_name: Optional[str] = None 462 | ) -> Optional[str]: 463 | """ 464 | Get the IP address of a container, optionally specifying a network. 465 | Prefers IPv4 if available on the specified network. 466 | 467 | Args: 468 | container: Docker container object. 469 | network_name: Optional name of the Docker network. 470 | 471 | Returns: 472 | Optional[str]: IP address (string) or None if not found/error. 473 | """ 474 | try: 475 | # Reload container attributes to get fresh network settings 476 | container.reload() 477 | networks = container.attrs.get("NetworkSettings", {}).get("Networks", {}) 478 | 479 | if not networks: 480 | self.logger.warning( 481 | f"No network settings found for container {container.name}" 482 | ) 483 | return None 484 | 485 | target_network = None 486 | if network_name: 487 | if network_name in networks: 488 | target_network = networks[network_name] 489 | else: 490 | self.logger.warning( 491 | f"Network '{network_name}' not found for container {container.name}. Available: {list(networks.keys())}" 492 | ) 493 | return None 494 | elif len(networks) == 1: 495 | # If only one network, use it 496 | target_network = list(networks.values())[0] 497 | else: 498 | # If multiple networks and none specified, try common defaults or just pick one? 499 | # Trying 'bridge' or the first one alphabetically might be heuristics. 500 | # For now, let's prioritize bridge, then pick the first if no specific requested. 501 | if "bridge" in networks: 502 | target_network = networks["bridge"] 503 | self.logger.debug( 504 | f"Multiple networks found for {container.name}, using default 'bridge'. Specify label '{self.label_prefix}/network' if needed." 505 | ) 506 | else: 507 | first_network_name = sorted(networks.keys())[0] 508 | target_network = networks[first_network_name] 509 | self.logger.debug( 510 | f"Multiple networks found for {container.name}, using first network '{first_network_name}'. Specify label '{self.label_prefix}/network' if needed." 511 | ) 512 | 513 | if target_network: 514 | ip_address = target_network.get("IPAddress") 515 | if ip_address and self._is_valid_ip( 516 | ip_address 517 | ): # Ensure it's a valid, non-empty IP 518 | return ip_address 519 | else: 520 | # Check for IPv6 if IPv4 is missing/invalid 521 | ip_address_v6 = target_network.get("GlobalIPv6Address") 522 | if ip_address_v6 and self._is_valid_ip(ip_address_v6): 523 | return ip_address_v6 524 | else: 525 | self.logger.warning( 526 | f"No valid IP address found for container {container.name} on network {network_name or 'selected network'}. Network details: {target_network}" 527 | ) 528 | 529 | except docker.errors.NotFound: 530 | self.logger.error( 531 | f"Container {container.name} not found during IP address retrieval." 532 | ) 533 | except Exception as e: 534 | self.logger.exception( 535 | f"Error getting IP address for container {container.name}: {e}" 536 | ) 537 | 538 | return None 539 | 540 | @staticmethod 541 | def _is_valid_ip(ip: str) -> bool: 542 | """Check if the string is a valid non-empty IP address.""" 543 | if not ip: # Check for empty string 544 | return False 545 | try: 546 | ipaddress.ip_address(ip) 547 | return True 548 | except ValueError: 549 | return False 550 | 551 | @staticmethod 552 | def _matches_filter(labels: Dict[str, str], label_filter: str) -> bool: 553 | """ 554 | Check if container labels match the filter expression. 555 | Filter format: "key=value" or "key". 556 | 557 | Args: 558 | labels: Container labels 559 | label_filter: Filter expression 560 | 561 | Returns: 562 | bool: True if labels match the filter, False otherwise 563 | """ 564 | if "=" in label_filter: 565 | key, value = label_filter.split("=", 1) 566 | return labels.get(key) == value 567 | else: 568 | # Check for key existence 569 | return label_filter in labels 570 | -------------------------------------------------------------------------------- /sherpa_dns/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stedrow/sherpa-dns/3f6c987a2d5fcaccfb5371f42e60e3c2c522ae41/sherpa_dns/utils/__init__.py -------------------------------------------------------------------------------- /sherpa_dns/utils/cleanup_tracker.py: -------------------------------------------------------------------------------- 1 | """ 2 | Cleanup tracker module for Sherpa-DNS. 3 | 4 | This module is responsible for tracking DNS records that are pending deletion 5 | and determining when they are eligible for deletion. 6 | """ 7 | 8 | import logging 9 | import re 10 | import time 11 | from typing import Dict, List 12 | 13 | 14 | class CleanupTracker: 15 | """ 16 | Tracks DNS records that are pending deletion and determines when they are eligible for deletion. 17 | """ 18 | 19 | def __init__(self, delay: str = "15m"): 20 | """ 21 | Initialize a CleanupTracker. 22 | 23 | Args: 24 | delay: Delay before records are eligible for deletion (e.g., 15m, 1h, 30s) 25 | """ 26 | self.delay = self._parse_duration(delay) 27 | self.original_delay_str = delay # Store original string for logging 28 | self.pending_deletions: Dict[str, float] = {} # Map of record ID to timestamp 29 | self.logger = logging.getLogger("sherpa-dns.cleanup-tracker") 30 | self.logger.debug( 31 | f"Cleanup delay set to {self.delay} seconds ({self.original_delay_str})" 32 | ) 33 | 34 | def mark_for_deletion(self, record_id: str) -> None: 35 | """ 36 | Mark a record for deletion with the current timestamp. 37 | 38 | Args: 39 | record_id: Record ID to mark for deletion 40 | """ 41 | if record_id not in self.pending_deletions: 42 | self.pending_deletions[record_id] = time.time() 43 | self.logger.info( 44 | f"Marked record {record_id} for deletion (eligible in {self.original_delay_str})" 45 | ) 46 | else: 47 | self.logger.debug(f"Record {record_id} is already marked for deletion.") 48 | 49 | def unmark_for_deletion(self, record_id: str) -> None: 50 | """ 51 | Remove deletion mark if record is active again. 52 | 53 | Args: 54 | record_id: Record ID to unmark for deletion 55 | """ 56 | if record_id in self.pending_deletions: 57 | del self.pending_deletions[record_id] 58 | self.logger.info(f"Unmarked record {record_id} for deletion") 59 | 60 | def get_eligible_for_deletion(self) -> List[str]: 61 | """ 62 | Get records that have been pending deletion for longer than the delay. 63 | 64 | Returns: 65 | List[str]: List of record IDs eligible for deletion 66 | """ 67 | eligible = [] 68 | now = time.time() 69 | 70 | for record_id, timestamp in list(self.pending_deletions.items()): 71 | elapsed = now - timestamp 72 | if elapsed >= self.delay: 73 | self.logger.debug( 74 | f"Record {record_id} eligible for deletion (elapsed: {elapsed:.2f}s >= delay: {self.delay}s)" 75 | ) 76 | eligible.append(record_id) 77 | del self.pending_deletions[record_id] 78 | self.logger.info(f"Record {record_id} is eligible for deletion") 79 | else: 80 | self.logger.debug( 81 | f"Record {record_id} still pending deletion (elapsed: {elapsed:.2f}s < delay: {self.delay}s)" 82 | ) 83 | 84 | return eligible 85 | 86 | def get_pending_status(self) -> Dict[str, float]: 87 | """Returns a dictionary mapping pending record IDs to remaining seconds. 88 | Remaining time is positive if deletion is pending, negative if overdue (shouldn't happen often). 89 | """ 90 | status = {} 91 | now = time.time() 92 | for record_id, timestamp in self.pending_deletions.items(): 93 | time_until_eligible = (timestamp + self.delay) - now 94 | status[record_id] = time_until_eligible 95 | return status 96 | 97 | def _parse_duration(self, duration_str: str) -> int: 98 | """ 99 | Parse a duration string like '15m' into seconds. 100 | 101 | Args: 102 | duration_str: Duration string 103 | 104 | Returns: 105 | int: Duration in seconds 106 | """ 107 | if not duration_str: 108 | return 15 * 60 # Default to 15 minutes 109 | 110 | # Pattern for duration string (e.g., 15m, 1h, 30s) 111 | pattern = r"^(\d+)([smhd])$" 112 | match = re.match(pattern, duration_str) 113 | 114 | if not match: 115 | return 15 * 60 # Default to 15 minutes 116 | 117 | value, unit = match.groups() 118 | value = int(value) 119 | 120 | # Convert to seconds 121 | if unit == "s": 122 | return value 123 | elif unit == "m": 124 | return value * 60 125 | elif unit == "h": 126 | return value * 60 * 60 127 | elif unit == "d": 128 | return value * 60 * 60 * 24 129 | 130 | return 15 * 60 # Default to 15 minutes 131 | -------------------------------------------------------------------------------- /sherpa_dns/utils/health.py: -------------------------------------------------------------------------------- 1 | """ 2 | Health check module for Sherpa-DNS. 3 | 4 | This module provides health check endpoints for monitoring the application. 5 | """ 6 | 7 | import json 8 | import logging 9 | from http.server import BaseHTTPRequestHandler, HTTPServer 10 | from threading import Thread 11 | 12 | import docker 13 | 14 | 15 | class HealthCheckHandler(BaseHTTPRequestHandler): 16 | """ 17 | HTTP request handler for health check endpoints. 18 | """ 19 | 20 | def __init__(self, *args, **kwargs): 21 | self.logger = logging.getLogger("sherpa-dns.health") 22 | super().__init__(*args, **kwargs) 23 | 24 | def do_GET(self): 25 | """ 26 | Handle GET requests. 27 | """ 28 | if self.path == "/health": 29 | self._handle_health_check() 30 | elif self.path == "/metrics": 31 | self._handle_metrics() 32 | else: 33 | self.send_response(404) 34 | self.end_headers() 35 | self.wfile.write(b"Not Found") 36 | 37 | def _handle_health_check(self): 38 | """ 39 | Handle health check requests. 40 | """ 41 | # Check Docker connection 42 | try: 43 | docker_client = docker.from_env() 44 | docker_client.ping() 45 | 46 | # Return 200 OK 47 | self.send_response(200) 48 | self.send_header("Content-type", "application/json") 49 | self.end_headers() 50 | 51 | response = {"status": "healthy", "docker": "connected"} 52 | 53 | self.wfile.write(json.dumps(response).encode()) 54 | except Exception as e: 55 | # Return 503 Service Unavailable 56 | self.send_response(503) 57 | self.send_header("Content-type", "application/json") 58 | self.end_headers() 59 | 60 | response = {"status": "unhealthy", "docker": f"error: {str(e)}"} 61 | 62 | self.wfile.write(json.dumps(response).encode()) 63 | 64 | def _handle_metrics(self): 65 | """ 66 | Handle metrics requests. 67 | """ 68 | # Return 200 OK 69 | self.send_response(200) 70 | self.send_header("Content-type", "text/plain") 71 | self.end_headers() 72 | 73 | # Simple metrics 74 | metrics = [ 75 | "# HELP sherpa_dns_up Whether the Sherpa-DNS service is up", 76 | "# TYPE sherpa_dns_up gauge", 77 | "sherpa_dns_up 1", 78 | ] 79 | 80 | self.wfile.write("\n".join(metrics).encode()) 81 | 82 | def log_message(self, format, *args): 83 | """ 84 | Override log_message to use the application logger. 85 | """ 86 | self.logger.debug(format % args) 87 | 88 | 89 | class HealthCheckServer: 90 | """ 91 | HTTP server for health check endpoints. 92 | """ 93 | 94 | def __init__(self, host: str = "0.0.0.0", port: int = 8080): 95 | """ 96 | Initialize a HealthCheckServer. 97 | 98 | Args: 99 | host: Host to bind to 100 | port: Port to bind to 101 | """ 102 | self.host = host 103 | self.port = port 104 | self.server = None 105 | self.thread = None 106 | self.logger = logging.getLogger("sherpa-dns.health") 107 | 108 | def start(self): 109 | """ 110 | Start the health check server. 111 | """ 112 | self.server = HTTPServer((self.host, self.port), HealthCheckHandler) 113 | self.thread = Thread(target=self.server.serve_forever) 114 | self.thread.daemon = True 115 | self.thread.start() 116 | self.logger.info(f"Health check: {self.host}:{self.port}/health") 117 | 118 | def stop(self): 119 | """ 120 | Stop the health check server. 121 | """ 122 | if self.server: 123 | self.server.shutdown() 124 | self.server.server_close() 125 | self.logger.info("Health check server stopped") 126 | --------------------------------------------------------------------------------