├── .github ├── dependabot.yml └── workflows │ ├── autotag.yaml │ ├── release-container-image.yaml │ └── shellcheck.yaml ├── README.md ├── ansible ├── Readme.md ├── playbook.yml └── roles │ └── vault-raft-backup-agent │ ├── defaults │ └── main.yml │ ├── handlers │ └── main.yml │ ├── tasks │ └── main.yml │ └── templates │ └── etc │ ├── systemd │ └── system │ │ ├── vault-raft-backup-agent.service.j2 │ │ ├── vault-snap-agent.service.j2 │ │ └── vault-snap-agent.timer.j2 │ └── vault.d │ └── vault_snapshot_agent.hcl.j2 ├── kubernetes ├── Dockerfile ├── README.md ├── cronjob.yaml ├── serviceaccount.yaml └── vault-snapshot.sh └── terraform ├── Readme.md ├── outputs.tf ├── terraform.tf ├── terraform.tfvars └── variables.tf /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | # Update Dockerfile 4 | - package-ecosystem: "docker" 5 | directory: "/" 6 | schedule: 7 | interval: "daily" 8 | commit-message: 9 | prefix: "fix: " 10 | 11 | # Maintain dependencies for GitHub Actions 12 | - package-ecosystem: "github-actions" 13 | directory: "/" 14 | schedule: 15 | interval: "daily" 16 | commit-message: 17 | prefix: "chore(ci): " 18 | open-pull-requests-limit: 10 19 | -------------------------------------------------------------------------------- /.github/workflows/autotag.yaml: -------------------------------------------------------------------------------- 1 | name: Bump version 2 | on: 3 | push: 4 | branches: 5 | - master 6 | jobs: 7 | release: 8 | runs-on: ubuntu-latest 9 | steps: 10 | - name: Checkout 11 | uses: actions/checkout@v4 12 | with: 13 | fetch-depth: 0 14 | 15 | - name: Run go-semantic-release 16 | id: semrel 17 | uses: go-semantic-release/action@v1.24 18 | with: 19 | github-token: ${{ secrets.PAT }} 20 | allow-initial-development-versions: true 21 | -------------------------------------------------------------------------------- /.github/workflows/release-container-image.yaml: -------------------------------------------------------------------------------- 1 | name: Release Container Image 2 | 3 | on: 4 | schedule: 5 | - cron: '3 3 * * *' 6 | pull_request: 7 | push: 8 | branches: master 9 | tags: 10 | - 'v*.*.*' 11 | 12 | jobs: 13 | container: 14 | runs-on: ubuntu-latest 15 | steps: 16 | - name: Checkout 17 | uses: actions/checkout@v4 18 | 19 | - name: Docker meta 20 | id: meta 21 | uses: docker/metadata-action@v5 22 | with: 23 | # list of Docker images to use as base name for tags 24 | images: | 25 | ghcr.io/adfinis/vault-snapshot 26 | # generate Docker tags based on the following events/attributes 27 | tags: | 28 | type=schedule 29 | type=ref,event=branch 30 | type=ref,event=pr 31 | type=semver,pattern={{version}} 32 | type=semver,pattern={{major}}.{{minor}} 33 | type=semver,pattern={{major}} 34 | type=sha 35 | 36 | - name: Set up QEMU 37 | uses: docker/setup-qemu-action@v3 38 | 39 | - name: Set up Docker Buildx 40 | uses: docker/setup-buildx-action@v3 41 | 42 | - name: Login to GHCR 43 | uses: docker/login-action@v3 44 | with: 45 | registry: ghcr.io 46 | username: ${{ github.repository_owner }} 47 | password: ${{ secrets.GITHUB_TOKEN }} 48 | 49 | - name: Build and push 50 | id: docker_build 51 | uses: docker/build-push-action@v6 52 | with: 53 | context: ./kubernetes 54 | file: ./kubernetes/Dockerfile 55 | push: ${{ github.event_name != 'pull_request' }} 56 | tags: ${{ steps.meta.outputs.tags }} 57 | labels: ${{ steps.meta.output.labels }} 58 | -------------------------------------------------------------------------------- /.github/workflows/shellcheck.yaml: -------------------------------------------------------------------------------- 1 | name: Shellcheck 2 | on: pull_request 3 | jobs: 4 | shellcheck: 5 | runs-on: ubuntu-latest 6 | steps: 7 | - name: Checkout 8 | uses: actions/checkout@v4 9 | - name: Run shellcheck 10 | id: shellcheck 11 | uses: ludeeus/action-shellcheck@2.0.0 12 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Vault Agent for Raft Integrated Storage Backup (draft) 2 | 3 | The problem: [Snapshot automation](https://learn.hashicorp.com/vault/operations/storage-migration-checklist#summary), "No out-of-the-box automation tool" for [Raft storage snapshots](https://www.vaultproject.io/docs/commands/operator/raft) 4 | 5 | A suggested solution: The Vault Agent and the snapshot cronjob can be deployed on a remote backup server or on the Vault instances itself. 6 | 7 | ## Prerequisites 8 | 9 | The automation code (Ansible playbook and Terraform) does not automatically [install the Vault binary](https://learn.hashicorp.com/tutorials/vault/getting-started-install). 10 | 11 | ## Vault Policy 12 | 13 | Policy for the snapshot agent: 14 | ```bash 15 | echo ' 16 | path "sys/storage/raft/snapshot" { 17 | capabilities = ["read"] 18 | }' | vault policy write snapshot - 19 | ``` 20 | 21 | This policy is included in the [./terraform](./terraform) code. 22 | 23 | ## AppRole Authentication 24 | 25 | These manual steps for AppRole authentication are automated in the [./terraform](./terraform) code. 26 | 27 | Enable AppRole and create the `vault-snap-agent` role: 28 | ```bash 29 | vault auth enable approle 30 | vault write auth/approle/role/vault-snap-agent token_ttl=2h token_policies=snapshot 31 | #vault read auth/approle/role/vault-snap-agent 32 | vault read auth/approle/role/vault-snap-agent/role-id -format=json | jq -r .data.role_id # sudo tee vault-host:/etc/vault.d/snap-roleid 33 | vault write -f auth/approle/role/vault-snap-agent/secret-id -format=json | jq -r .data.secret_id # sudo tee vault-host:/etc/vault.d/snap-secretid 34 | ``` 35 | 36 | On all Vault servers: 37 | ```bash 38 | echo "7581f63b-e36b-e105-0c6d-07c534c916c4" > /etc/vault.d/snap-roleid 39 | echo "91919667-7587-4a69-a4f9-766358b082ac" > /etc/vault.d/snap-secretid 40 | chmod 0640 /etc/vault.d/snap-{roleid,secretid} 41 | chown vault:vault /etc/vault.d/snap-{roleid,secretid} 42 | ``` 43 | 44 | ## Vault Proxy Configuration 45 | 46 | Configure the vault proxy for the snapshots: 47 | ```bash 48 | cat << EOF > /etc/vault.d/vault_snapshot_agent.hcl 49 | # Vault agent configuration for Raft snapshots 50 | 51 | vault { 52 | address = "https://$HOSTNAME:8200" 53 | } 54 | 55 | api_proxy { 56 | # Authenticate all requests automatically with the auto_auth token 57 | # https://developer.hashicorp.com/vault/docs/agent-and-proxy/proxy/apiproxy 58 | use_auto_auth_token = true 59 | } 60 | 61 | listener "unix" { 62 | # Expose Vault-API seperately 63 | # https://developer.hashicorp.com/vault/docs/agent/caching#configuration-listener 64 | address = "/etc/vault.d/agent.sock" 65 | tls_disable = true 66 | } 67 | 68 | auto_auth { 69 | method { 70 | # Authenticate with AppRole 71 | # https://www.vaultproject.io/docs/agent/autoauth/methods/approle 72 | type = "approle" 73 | 74 | config = { 75 | role_id_file_path = "/etc/vault.d/snap-roleid" 76 | secret_id_file_path = "/etc/vault.d/snap-secretid" 77 | remove_secret_id_file_after_reading = false 78 | } 79 | } 80 | } 81 | EOF 82 | ``` 83 | 84 | ## Vault Agent Systemd Service 85 | 86 | Configure the systemd service for the snapshot agent: 87 | ```bash 88 | cat << EOF > /etc/systemd/system/vault-snap-agent.service 89 | [Unit] 90 | Description=Vault Snapshot Agent 91 | Requires=network-online.target 92 | After=network-online.target 93 | ConditionFileNotEmpty=/etc/vault.d/vault.hcl 94 | 95 | [Service] 96 | Restart=on-failure 97 | ExecStart=/usr/local/bin/vault proxy -config=/etc/vault.d/vault_snapshot_agent.hcl 98 | ExecReload=/bin/kill -HUP $MAINPID 99 | KillSignal=SIGINT 100 | User=vault 101 | Group=vault 102 | RuntimeDirectoryMode=0750 103 | RuntimeDirectory=vault-snap-agent 104 | 105 | [Install] 106 | WantedBy=multi-user.target 107 | EOF 108 | ``` 109 | 110 | Start the agent on all Vault servers: 111 | ```bash 112 | systemctl daemon-reload 113 | systemctl enable --now vault-snap-agent 114 | ``` 115 | 116 | ## Vault Raft Snapshot Cronjob 117 | 118 | Create a cronjob or an systemd service/timer unit (matter of preference). 119 | 120 | Create a script to execute the snapshot: 121 | ```bash 122 | cat << 'EOF' > /usr/local/bin/vault-snapshot 123 | #!/bin/sh 124 | # 125 | # Take Vault Raft integrated storage snapshots on the leader 126 | # See also: 127 | # - /etc/vault.d/vault_snapshot_agent.hcl 128 | # - /etc/systemd/system/vault-agent.service 129 | 130 | VAULT_ADDR="VAULT_ADDR=unix:///etc/vault.d/agent.sock" \ 131 | /usr/local/bin/vault operator raft snapshot save "/opt/vault/snapshots/vault-raft_$(date +%F-%H%M).snapshot" 132 | EOF 133 | ``` 134 | 135 | Make the script executable: 136 | ```bash 137 | chmod +x /usr/local/bin/vault-snapshot 138 | ``` 139 | 140 | Take hourly snapshots with cron, make sure the cronjobs are evenly spaced out every hour (e.g. server1: Minute 0, server2: Minute 20, server3: Minute 40): 141 | ```bash 142 | echo "0 * * * * root /usr/local/bin/vault-snapshot" >> /etc/crontab 143 | ``` 144 | 145 | Test the script (errors probably in `/var/spool/mail/root`): 146 | ```bash 147 | vault-snapshot 148 | ``` 149 | 150 | ## Verify Backup 151 | 152 | List the backups: 153 | ```bash 154 | [root@vault1 ~]# ls -l /opt/vault/snapshots 155 | total 96 156 | -rw-r--r--. 1 root root 0 May 29 06:37 vault-raft_2020-05-29-0637.snapshot 157 | -rw-r--r--. 1 root root 21451 May 29 07:03 vault-raft_2020-05-29-0703.snapshot 158 | ``` 159 | 160 | ## Sync with remote storage 161 | ### S3 162 | 163 | Install s3cmd: https://github.com/s3tools/s3cmd/releases 164 | 165 | ```bash 166 | zypper install python3 167 | ln -s /usr/bin/python3 /usr/bin/python 168 | 169 | wget 170 | tar xvf s3cmd-x.x.x.tar.gz 171 | cd s3cmd-x.x.x 172 | python setup.py install 173 | ``` 174 | 175 | Configure s3cmd: 176 | ``` 177 | s3cmd --configure 178 | s3cmd mb s3://raft-snapshots 179 | ``` 180 | 181 | Add s3cmd sync to `vault-snapshot`: 182 | ```bash 183 | echo "/usr/bin/s3cmd sync /opt/vault/snapshots/* s3://raft-snapshots" >> /usr/local/bin/vault-snapshot 184 | ``` 185 | 186 | ## Retention 187 | 188 | For an retention of 7 days (locally, not on the remote storage) you need to add the following to the `vault-snapshot` script: 189 | ``` 190 | find /opt/vault/snapshots/* -mtime +7 -exec rm {} \; 191 | ``` 192 | 193 | To change the retention you can change the `+7` from the mtime parameter. 194 | -------------------------------------------------------------------------------- /ansible/Readme.md: -------------------------------------------------------------------------------- 1 | # Vault Raft Backup Agent - Ansible Configuration 2 | 3 | This directory contains Ansible instructions to deploy the Raft Backup Agent. The tasks of the role `vault-raft-backup-agent` are derived from the [description of the backup approach](../Readme.md). 4 | 5 | ## Ansible Variables 6 | 7 | All variables of the role are documented in the file [`main.yml`](./roles/vault-raft-backup-agent/defaults/main.yml). 8 | 9 | The role `vault-raft-backup-agent` assumes that the roleid and secretid are defined. The backup agent and the backup job use these variables to authenticate against the Vault: 10 | 11 | * `vault_raft_bck_role_id`: The AppRole roleid 12 | * `vault_raft_bck_secret_id`: The AppRole secretid 13 | 14 | Note that the **secretid is removed by default**. Set the variable `remove_secret_id_file_after_reading: no` to alter this behavior. 15 | 16 | ## Usage 17 | 18 | ### Role Usage / Playbook 19 | An example playbook is provided in the file [`playbook.yml`](./playbook.yml) 20 | 21 | ```bash 22 | # run the playbook with a custom inventory (not included in this repo) 23 | $ ansible-playbook playbook.yml -i inventory 24 | ``` 25 | 26 | ### Check Snapshot Job Status 27 | 28 | ```bash 29 | $ systemctl list-timers 30 | ``` 31 | 32 | ## Limitations 33 | The Ansible role comes with the following limitations: 34 | 35 | * Does not configure a cron job, only a systemd timer/service pair 36 | * Does not automatically [install the Vault binary](https://learn.hashicorp.com/tutorials/vault/getting-started-install) 37 | -------------------------------------------------------------------------------- /ansible/playbook.yml: -------------------------------------------------------------------------------- 1 | --- 2 | 3 | - hosts: vault 4 | roles: 5 | - { role: vault-raft-backup-agent, tags: vault-raft-backup-agent } 6 | -------------------------------------------------------------------------------- /ansible/roles/vault-raft-backup-agent/defaults/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | 3 | # Vault configuration directory 4 | vault_snapshot_config_dir: '/etc/vault.d' 5 | # Name for pid file 6 | vault_snapshot_pid_file_name: 'vault-raft-backup-agent.pid' 7 | # Location of pid file 8 | vault_snapshot_pid_dir: '{{ vault_snapshot_config_dir }}' 9 | # Systemd runtime directory 10 | vault_snapshot_run_dir: 'vault-raft-backup-agent' 11 | # Vault agent listener unix socket 12 | vault_snapshot_listener_socket: '{{ vault_snapshot_config_dir }}/agent.sock' 13 | 14 | # Vault API address 15 | vault_address: '127.0.0.1' 16 | # Vault API tls, choosing 'no' here will change the protocol 17 | # for vault_address from 'http' (default, for dev server) to 'https' 18 | vault_tls_disable: yes 19 | # Path on the local disk to a single PEM-encoded CA certificate to verify the Vault server's SSL certificate 20 | # only one of vault_ca_cert or vault_ca_path should be used 21 | vault_ca_cert: "" 22 | # Path on the local disk to a directory of PEM-encoded CA certificates to verify the Vault server's SSL certificate 23 | vault_ca_path: "" 24 | # Name to use as the SNI host when connecting via TLS 25 | vault_tls_server_name: "" 26 | # Disable verification of TLS certificates. Using this option is highly discouraged as it decreases the security of data transmissions to and from the Vault server 27 | vault_tls_skip_verify: no 28 | 29 | # Vault snapshot agent config file destination on remote host 30 | vault_snapshot_agent_config_file: '{{ vault_snapshot_config_dir }}/vault_snapshot_agent.hcl' 31 | # Vault snapshot agent config file template 32 | vault_snapshot_agent_config_file_template: 'templates{{ vault_snapshot_config_dir }}/vault_snapshot_agent.hcl.j2' 33 | 34 | # Systemd directory for service and timer files on remote host 35 | vault_snapshot_systemd_dir: '/etc/systemd/system' 36 | # Systemd service name for snapshot agent 37 | vault_snapshot_systemd_service_name: 'vault-raft-backup-agent.service' 38 | 39 | # Systemd timer name 40 | vault_snapshot_systemd_timer_name: 'vault-snap-agent.timer' 41 | # Systemd timer service name; this service performs the actual snapshotting 42 | vault_snapshot_systemd_timer_service_name: 'vault-snap-agent.service' 43 | # Systemd timer OnActiveSec, run 1s after activation 44 | vault_snapshot_systemd_timer_onactivesec: '1s' 45 | # Systemd timer OnUnitActiveSec, run monotonic timer every hour after activation 46 | vault_snapshot_systemd_timer_onunitactivesec: '1h' 47 | # Systemd timer description 48 | vault_snapshot_systemd_timer_description: 'Vault integrated storage snapshot' 49 | 50 | # Owner/Group for configuration directory and files 51 | vault_user: vault 52 | vault_group: vault 53 | 54 | # Path to vault binary 55 | vault_bin_path: '/usr/local/bin' 56 | 57 | # Variables for actual AppRole roleid and secretid. These variables can be defined 58 | # manually or generated with the terraform configuration from this repo. 59 | # Use Ansible Vault to encrypt the secretid. 60 | vault_raft_bck_role_id: '' 61 | vault_raft_bck_secret_id: 'sensitive' 62 | 63 | # path to roleid and secretid files for AppRole auth 64 | vault_snapshot_approle_roleid_file: '{{ vault_snapshot_config_dir }}/snap-roleid' 65 | vault_snapshot_approle_secretid_file: '{{ vault_snapshot_config_dir }}/snap-secretid' 66 | 67 | # This can be set to false to disable the default behavior 68 | # of removing the secret ID file after it's been read: 69 | # https://www.vaultproject.io/docs/agent/autoauth/methods/approle#remove_secret_id_file_after_reading 70 | remove_secret_id_file_after_reading: yes 71 | 72 | # Snapshot output directory 73 | vault_snapshot_dir: '/opt/vault/snapshots' 74 | # Snapshot file name format 75 | vault_snapshot_file_name: 'vault-raft_$(date +%%F-%%H%%M).snapshot' 76 | 77 | # Snapshot retention find time filter, 78 | # one of 'mtime' or 'atime', see `man find` 79 | vault_snapshot_retention_find_mode: 'mtime' 80 | # Snapshot retention, data was last modified/accessed 7*24 hours ago 81 | vault_snapshot_retention_time: '+7' 82 | # Action to take on expired files 83 | vault_snapshot_retention_find_action: 'rm' 84 | -------------------------------------------------------------------------------- /ansible/roles/vault-raft-backup-agent/handlers/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | 3 | - name: daemon reload 4 | service: 5 | daemon_reload: yes -------------------------------------------------------------------------------- /ansible/roles/vault-raft-backup-agent/tasks/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | 3 | - name: ensure Vault snapshot config dir 4 | file: 5 | path: '{{ vault_snapshot_config_dir }}' 6 | state: directory 7 | mode: '0700' 8 | owner: '{{ vault_user }}' 9 | group: '{{ vault_group }}' 10 | 11 | - name: write roleid 12 | copy: 13 | dest: '{{ vault_snapshot_approle_roleid_file }}' 14 | content: '{{ vault_raft_bck_role_id }}' 15 | mode: '0640' 16 | owner: '{{ vault_user }}' 17 | group: '{{ vault_group }}' 18 | 19 | - name: write secretid 20 | no_log: yes 21 | copy: 22 | dest: '{{ vault_snapshot_approle_secretid_file }}' 23 | content: '{{ vault_raft_bck_secret_id }}' 24 | mode: '0640' 25 | owner: '{{ vault_user }}' 26 | group: '{{ vault_group }}' 27 | 28 | - name: write snapshot agent configuration file 29 | template: 30 | src: '{{ vault_snapshot_agent_config_file_template }}' 31 | dest: '{{ vault_snapshot_agent_config_file }}' 32 | mode: '0640' 33 | owner: '{{ vault_user }}' 34 | group: '{{ vault_group }}' 35 | 36 | # Vault backup agent systemd config 37 | - name: create systemd service file for backup agent 38 | template: 39 | src: 'templates{{ vault_snapshot_systemd_dir }}/{{ vault_snapshot_systemd_service_name }}.j2' 40 | dest: '{{ vault_snapshot_systemd_dir }}/{{ vault_snapshot_systemd_service_name }}' 41 | notify: 42 | - daemon reload 43 | 44 | - name: start and enable systemd service 45 | service: 46 | name: '{{ vault_snapshot_systemd_service_name }}' 47 | enabled: yes 48 | state: started 49 | 50 | # Timer systemd config 51 | - name: create systemd timer service file for snapshot service 52 | template: 53 | src: 'templates{{ vault_snapshot_systemd_dir }}/{{ vault_snapshot_systemd_timer_service_name }}.j2' 54 | dest: '{{ vault_snapshot_systemd_dir }}/{{ vault_snapshot_systemd_timer_service_name }}' 55 | notify: 56 | - daemon reload 57 | 58 | - name: create systemd timer file for snapshot service 59 | template: 60 | src: 'templates{{ vault_snapshot_systemd_dir }}/{{ vault_snapshot_systemd_timer_name }}.j2' 61 | dest: '{{ vault_snapshot_systemd_dir }}/{{ vault_snapshot_systemd_timer_name }}' 62 | notify: 63 | - daemon reload 64 | 65 | - name: ensure snapshot directory exists 66 | file: 67 | path: '{{ vault_snapshot_dir }}' 68 | state: directory 69 | mode: '0755' 70 | owner: '{{ vault_user }}' 71 | group: '{{ vault_group }}' 72 | 73 | - name: start systemd timer 74 | service: 75 | name: '{{ vault_snapshot_systemd_timer_name }}' 76 | state: started 77 | -------------------------------------------------------------------------------- /ansible/roles/vault-raft-backup-agent/templates/etc/systemd/system/vault-raft-backup-agent.service.j2: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=Vault Snapshot Agent 3 | Requires=network-online.target 4 | After=network-online.target 5 | 6 | [Service] 7 | Restart=on-failure 8 | ExecStart={{ vault_bin_path }}/vault proxy -config={{ vault_snapshot_agent_config_file }} 9 | ExecReload=/bin/kill -HUP $MAINPID 10 | KillSignal=SIGINT 11 | User={{ vault_user }} 12 | Group={{ vault_group }} 13 | RuntimeDirectoryMode=0750 14 | RuntimeDirectory={{ vault_snapshot_run_dir }} 15 | 16 | [Install] 17 | WantedBy=multi-user.target 18 | -------------------------------------------------------------------------------- /ansible/roles/vault-raft-backup-agent/templates/etc/systemd/system/vault-snap-agent.service.j2: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description={{ vault_snapshot_systemd_timer_description }} 3 | 4 | [Service] 5 | Type=oneshot 6 | Environment=VAULT_ADDR=unix://{{ vault_snapshot_listener_socket }} 7 | ExecStart=/bin/sh -c '{{ vault_bin_path }}/vault operator raft snapshot save "{{ vault_snapshot_dir }}/{{ vault_snapshot_file_name }}"' 8 | ExecStartPost=/bin/sh -c 'find {{ vault_snapshot_dir }}/* -{{ vault_snapshot_retention_find_mode }} {{ vault_snapshot_retention_time }} -exec {{ vault_snapshot_retention_find_action }} {} \;' 9 | 10 | [Install] 11 | WantedBy=multi-user.target 12 | -------------------------------------------------------------------------------- /ansible/roles/vault-raft-backup-agent/templates/etc/systemd/system/vault-snap-agent.timer.j2: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description={{ vault_snapshot_systemd_timer_description }} 3 | 4 | [Timer] 5 | Unit={{ vault_snapshot_systemd_timer_service_name }} 6 | 7 | OnActiveSec={{ vault_snapshot_systemd_timer_onactivesec }} 8 | OnUnitActiveSec={{ vault_snapshot_systemd_timer_onunitactivesec }} 9 | 10 | [Install] 11 | WantedBy=timers.target -------------------------------------------------------------------------------- /ansible/roles/vault-raft-backup-agent/templates/etc/vault.d/vault_snapshot_agent.hcl.j2: -------------------------------------------------------------------------------- 1 | # Vault agent configuration for Raft snapshots 2 | 3 | pid_file = "{{ vault_snapshot_pid_dir }}/{{ vault_snapshot_pid_file_name }}" 4 | 5 | vault { 6 | address = "{{ vault_tls_disable | ternary('http', 'https') }}://{{ vault_address }}:8200" 7 | {% if vault_ca_cert -%} 8 | ca_cert = "{{ vault_ca_cert }}" 9 | {% endif -%} 10 | {% if vault_ca_path -%} 11 | ca_path = "{{ vault_ca_path }}" 12 | {% endif -%} 13 | {% if vault_tls_server_name -%} 14 | tls_server_name = "{{ vault_tls_server_name }}" 15 | {% endif -%} 16 | tls_skip_verify = "{{ vault_tls_skip_verify | ternary('true', 'false') }}" 17 | } 18 | 19 | api_proxy { 20 | # Authenticate all requests automatically with the auto_auth token 21 | # https://developer.hashicorp.com/vault/docs/agent-and-proxy/proxy/apiproxy 22 | use_auto_auth_token = true 23 | } 24 | 25 | listener "unix" { 26 | # Expose Vault-API seperately 27 | # https://developer.hashicorp.com/vault/docs/agent/caching#configuration-listener 28 | address = "{{ vault_snapshot_listener_socket }}" 29 | tls_disable = true 30 | } 31 | 32 | auto_auth { 33 | method { 34 | # Authenticate with AppRole 35 | # https://www.vaultproject.io/docs/agent/autoauth/methods/approle 36 | type = "approle" 37 | 38 | config = { 39 | role_id_file_path = "{{ vault_snapshot_approle_roleid_file }}" 40 | secret_id_file_path = "{{ vault_snapshot_approle_secretid_file }}" 41 | remove_secret_id_file_after_reading = {{ remove_secret_id_file_after_reading | bool | lower }} 42 | } 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /kubernetes/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM alpine 2 | 3 | ARG VAULT_VERSION=1.16.3 4 | 5 | COPY vault-snapshot.sh / 6 | 7 | RUN wget https://releases.hashicorp.com/vault/${VAULT_VERSION}/vault_${VAULT_VERSION}_linux_amd64.zip && \ 8 | unzip vault_${VAULT_VERSION}_linux_amd64.zip && \ 9 | mv vault /usr/local/bin && rm vault*zip && \ 10 | apk add s3cmd && chmod +x vault-snapshot.sh 11 | 12 | CMD ["/vault-snapshot.sh"] 13 | -------------------------------------------------------------------------------- /kubernetes/README.md: -------------------------------------------------------------------------------- 1 | # Cronjob for snapshotting Vault running on Kubernetes 2 | 3 | This assumes the Kubernetes authentication backend is configured in Vault. 4 | 5 | The script being executed in this cronjob, is authenticating with Vault using the Kubernetes authentication backend, via its serviceaccount JWT. 6 | 7 | The role and policy being used must be created before hand and must be used by the cronjob. 8 | 9 | After the snapshot is created in a temporary directory, `s3cmd` is used to sync it to a s3 endpoint. 10 | 11 | ## Configuration over environment variables 12 | 13 | * `VAULT_ADDR` - Vault address to access 14 | * `VAULT_ROLE` - Vault role to use to create the snapshot 15 | * `USE_JWT_AUTH` - If set to true; the script will use JWT authentication else it will use kubernetes authentication 16 | * `S3_URI` - S3 URI to use to upload (s3://xxx) 17 | * `S3_BUCKET` - S3 bucket to point to 18 | * `S3_HOST` - S3 endpoint 19 | * `S3_EXPIRE_DAYS` - Delete files older than this threshold (expired) 20 | * `AWS_ACCESS_KEY_ID` - Access key to use to access S3 21 | * `AWS_SECRET_ACCESS_KEY` - Secret access key to use to access S3 22 | 23 | ## Configuration of file retention (pruning) 24 | 25 | With AWS S3, use [lifecycle 26 | rules](https://docs.aws.amazon.com/AmazonS3/latest/userguide/lifecycle-expire-general-considerations.html) 27 | to configure retention and automatic cleanup action (prune) for expired files. 28 | 29 | For other S3 compatible storage, ensure to set [Governance 30 | lock](https://community.exoscale.com/documentation/storage/versioning/#set-up-the-lock-configuration-for-a-bucket) 31 | to avoid any modification before `$S3_EXPIRE_DAYS`: 32 | 33 | ``` 34 | mc retention set --default GOVERNANCE "${S3_EXPIRE_DAYS}d" my-s3-remote/my-bucket 35 | ``` 36 | 37 | On removal by the `vault-snapshot.sh` script, [`DEL` deletion marker 38 | (tombstone)](https://docs.aws.amazon.com/AmazonS3/latest/userguide/object-lock-managing.html#object-lock-managing-delete-markers) 39 | is set: 40 | 41 | ``` 42 | mc ls --versions my-snapshots/vault-snapshots-2f848f 43 | [2024-09-09 09:07:46 CEST] 0B X/1031980658232456253 v2 DEL vault_2024-09-06-1739.snapshot 44 | [2024-09-06 19:39:49 CEST] 28KiB Standard 1031052557042383613 v1 PUT vault_2024-09-06-1739.snapshot 45 | ``` 46 | 47 | Use [`mc 48 | undo`](https://min.io/docs/minio/linux/reference/minio-mc/mc-undo.html) to undo 49 | the `DEL` operation: 50 | ``` 51 | mc undo my-snapshots/vault-snapshots-2f848f/vault_2024-09-06-1739.snapshot 52 | mc ls --versions my-snapshots/vault-snapshots-2f848f 53 | [2024-09-06 19:39:49 CEST] 28KiB Standard 1031052557042383613 v1 PUT vault_2024-09-06-1739.snapshot 54 | ``` 55 | -------------------------------------------------------------------------------- /kubernetes/cronjob.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: batch/v1 3 | kind: CronJob 4 | metadata: 5 | labels: 6 | app.kubernetes.io/name: vault-snapshot 7 | app.kubernetes.io/version: v0.1.0 8 | name: vault-snapshot 9 | spec: 10 | schedule: 0 4 * * * 11 | jobTemplate: 12 | metadata: 13 | labels: 14 | app.kubernetes.io/name: vault-snapshot 15 | app.kubernetes.io/version: v0.1.0 16 | spec: 17 | template: 18 | metadata: 19 | labels: 20 | app.kubernetes.io/name: vault-snapshot 21 | app.kubernetes.io/version: v0.1.0 22 | spec: 23 | restartPolicy: never 24 | automountServiceAccountToken: true 25 | serviceAccountName: vault-raft-snapshot 26 | containers: 27 | - name: vault-snapshot 28 | env: 29 | - name: S3_HOST 30 | value: s3.example.com 31 | - name: S3_BUCKET 32 | value: bucketname 33 | - name: S3_URI 34 | value: s3://bucketname 35 | # leave empty to retain snapshot files (default) 36 | - name: S3_EXPIRE_DAYS 37 | value: 38 | - name: VAULT_ROLE 39 | value: vault-snapshot 40 | - name: VAULT_ADDR 41 | value: https://vault.example.com 42 | - name: AWS_SECRET_ACCESS_KEY 43 | valueFrom: 44 | secretKeyRef: 45 | key: aws_secret_access_key 46 | name: vault-snapshot-credentials 47 | - name: AWS_ACCESS_KEY_ID 48 | valueFrom: 49 | secretKeyRef: 50 | key: aws_access_key_id 51 | name: vault-snapshot-credentials 52 | image: ghcr.io/adfinis/vault-snapshot:0.1.0 53 | volumeMounts: 54 | - name: snapshot-dir 55 | mountPath: /vault-snaphots 56 | imagePullPolicy: IfNotPresent 57 | volumes: 58 | - name: snapshot-dir 59 | emptyDir: {} 60 | -------------------------------------------------------------------------------- /kubernetes/serviceaccount.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: v1 3 | kind: ServiceAccount 4 | metadata: 5 | name: vault-raft-snapshot 6 | -------------------------------------------------------------------------------- /kubernetes/vault-snapshot.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | 3 | set -e 4 | 5 | 6 | # Authenticate with Vault 7 | JWT=$(cat /var/run/secrets/kubernetes.io/serviceaccount/token) 8 | export JWT 9 | if [ "${USE_JWT_AUTH}" = "true" ]; then 10 | echo "Using JWT Auth" 11 | VAULT_TOKEN=$(vault write -field=token auth/jwt/login role=my-role jwt="$JWT") 12 | export VAULT_TOKEN 13 | else 14 | echo "Using Kubernetes Auth" 15 | VAULT_TOKEN=$(vault write -field=token auth/kubernetes/login role="${VAULT_ROLE}" jwt="${JWT}") 16 | export VAULT_TOKEN 17 | fi 18 | # create snapshot 19 | vault operator raft snapshot save /vault-snapshots/vault_"$(date +%F-%H%M)".snapshot 20 | 21 | # upload to s3 22 | s3cmd put /vault-snapshots/* "${S3_URI}" --host="${S3_HOST}" --host-bucket="${S3_BUCKET}" 23 | 24 | # remove expired snapshots 25 | if [ "${S3_EXPIRE_DAYS}" ]; then 26 | s3cmd ls "${S3_URI}" --host="${S3_HOST}" --host-bucket="${S3_BUCKET}" | while read -r line; do 27 | createDate=$(echo "$line" | awk '{print $1" "$2}') 28 | createDate=$(date -d"$createDate" +%s) 29 | olderThan=$(date --date @$(($(date +%s) - 86400*S3_EXPIRE_DAYS)) +%s) 30 | if [ "$createDate" -lt "$olderThan" ]; then 31 | fileName=$(echo "$line" | awk '{print $4}') 32 | if [ "$fileName" != "" ]; then 33 | s3cmd del "$fileName" --host="${S3_HOST}" --host-bucket="${S3_BUCKET}" 34 | fi 35 | fi 36 | done; 37 | fi -------------------------------------------------------------------------------- /terraform/Readme.md: -------------------------------------------------------------------------------- 1 | # Vault Raft Backup Agent - Terraform Configuration 2 | 3 | This directory contains Terraform instructions that prepare the Vault for usage with the Raft Backup Agent. The Terraform code: 4 | 1. Adds a snapshot policy 5 | 2. Configures AppRole authentication 6 | 3. Adds the AppRole role for snapshots with the snapshot policy from (1) 7 | 4. Retrieves roleid and secretid and updates the values in Ansible vars 8 | 9 | These steps are derived from the [description of the backup approach](../Readme.md). 10 | 11 | ## Terraform Inputs: Ansible Variables 12 | 13 | The Terraform configuration assumes that the following Ansible var files should atomatically updated in step (4) above: 14 | 15 | | Description | Terraform Variable forming the Ansible Path | Variable Default Value | 16 | | --- | --- | --- | 17 | | The path of the roleid Ansible variable | `${ansible_play_dir}/vars/${ansible_vars_file_role_id}` | `../ansible/raft-backup-roleid.yml` | 18 | | The path of the secretid Ansible variable |`${ansible_play_dir}/vars/${ansible_vars_file_secret_id}` | `../ansible/raft-backup-secretid.yml` | 19 | | The location of the password file for Ansible Vault | `ansible_vault_id` | `../ansible/vault-pass` | 20 | 21 | Notes: 22 | 23 | * The secretid (sensitive) is encrypted with the [Ansible Vault](https://docs.ansible.com/ansible/latest/user_guide/vault.html) password defined in `ansible_vault_id`. 24 | * Any other variable in the file [`variables.tf`](./variables.tf) is used "internally", i.e., has no effect on the outputs of the module which could be processed by Ansible afterwards. 25 | 26 | ## Usage 27 | 28 | ```bash 29 | # review the variables 30 | $ cat terraform.tfvars 31 | 32 | # configure access credentials, see also 33 | # https://www.terraform.io/docs/providers/vault/index.html 34 | $ export VAULT_ADDR=http://127.0.0.1:8200 35 | $ export VAULT_TOKEN=root 36 | 37 | # initialize and apply the Vault configuration 38 | $ terraform init 39 | $ terraform plan 40 | $ terraform apply 41 | ``` 42 | 43 | ## Terraform Outputs: AppRole `roleid` and `secretid` 44 | 45 | To reveal the `roleid` and (sensitive) `secretid` of the current configuration use: 46 | ```bash 47 | $ terraform output approle_role_id 48 | $ terraform output approle_secret_id 49 | ``` 50 | 51 | ## Import Existing AppRole Configuration 52 | 53 | After initialization, existing AppRole configuration can be imported into the terraform state with: 54 | 55 | ```bash 56 | # Adjust $APPROLE_PATH to match the existing Vault configuration (remote). 57 | # Have a look at `vault auth list` to retrieve the path of an exisiting AppRole backend 58 | APPROLE_PATH=custom_approle 59 | terraform import vault_auth_backend.approle $APPROLE_PATH 60 | 61 | # Verify the import 62 | $ terraform state list 63 | vault_auth_backend.approle 64 | 65 | # Execute a plan to see the diff 66 | $ terraform plan -target=vault_auth_backend.approle 67 | # vault_auth_backend.approle will be updated in-place 68 | ~ resource "vault_auth_backend" "approle" { 69 | accessor = "auth_approle_e53052a8" 70 | default_lease_ttl_seconds = 0 71 | id = "custom_approle" 72 | local = false 73 | max_lease_ttl_seconds = 0 74 | path = "custom_approle" 75 | + tune = (known after apply) 76 | type = "approle" 77 | } 78 | 79 | # Copy/paste the diff to the `terraform.tf` file, 80 | # to replace the existing backend configuration 81 | # 82 | # ./terraform.tf excerpt 83 | # 84 | # AppRole backend 85 | resource "vault_auth_backend" "approle" { 86 | local = false 87 | path = "custom_approle" 88 | tune = [] 89 | type = "approle" 90 | } 91 | 92 | # Update the ./terraform.tf file until the plan on the resource 93 | # shows that the existing config file matches the current remote state 94 | terraform import vault_auth_backend.approle $APPROLE_PATH 95 | 96 | # Proceed similarly with any other resources for import, e.g., the AppRole role 97 | ROLE_PATH=auth/approle/role/existing-snapshot-approle-role 98 | terraform import vault_approle_auth_backend_role.example $ROLE_PATH 99 | ``` 100 | 101 | See also: https://www.terraform.io/docs/providers/vault/r/approle_auth_backend_role.html#import 102 | 103 | For importing other resources, have a look at the import instructions of the respective resources upstream: 104 | 105 | https://www.terraform.io/docs/providers/vault 106 | -------------------------------------------------------------------------------- /terraform/outputs.tf: -------------------------------------------------------------------------------- 1 | output "approle_role_id" { 2 | value = vault_approle_auth_backend_role.role.role_id 3 | } 4 | 5 | output "approle_secret_id" { 6 | value = vault_approle_auth_backend_role_secret_id.secretid 7 | sensitive = true 8 | } -------------------------------------------------------------------------------- /terraform/terraform.tf: -------------------------------------------------------------------------------- 1 | provider "vault" {} 2 | 3 | # Policy for creating Raft snapshots 4 | data "vault_policy_document" "raft" { 5 | rule { 6 | path = "sys/storage/raft/snapshot" 7 | capabilities = ["read"] 8 | description = "create raft storage snapshots" 9 | } 10 | } 11 | resource "vault_policy" "raft" { 12 | name = var.snapshot_role_policy_name 13 | policy = data.vault_policy_document.raft.hcl 14 | } 15 | 16 | # AppRole backend 17 | resource "vault_auth_backend" "approle" { 18 | type = "approle" 19 | } 20 | 21 | # AppRole backend role 22 | resource "vault_approle_auth_backend_role" "role" { 23 | backend = vault_auth_backend.approle.path 24 | role_name = var.approle_role_id 25 | token_policies = ["${var.snapshot_role_policy_name}"] 26 | token_ttl = var.approle_role_token_ttl 27 | } 28 | 29 | # AppRole secretid 30 | resource "vault_approle_auth_backend_role_secret_id" "secretid" { 31 | backend = vault_auth_backend.approle.path 32 | role_name = vault_approle_auth_backend_role.role.role_name 33 | } 34 | 35 | # Update the AppRole roleid in the Ansible vars 36 | resource "null_resource" "update_appid" { 37 | triggers = { 38 | # when the AppRole role changes 39 | key_id = vault_approle_auth_backend_role.role.id 40 | } 41 | provisioner "local-exec" { 42 | # Prepare directory for Ansible play variables 43 | # Play vars can be overridden with group or host vars, see also: 44 | # https://docs.ansible.com/ansible/latest/user_guide/playbooks_variables.html#variable-precedence-where-should-i-put-a-variable 45 | command = "mkdir -p '${var.ansible_variable_dir}'" 46 | } 47 | provisioner "local-exec" { 48 | # Write the new roleid to the Ansible vars file 49 | command = "echo \"${var.ansible_roleid_variable_name}: '${vault_approle_auth_backend_role.role.role_id}'\" >> \"${var.ansible_variable_dir}/${var.ansible_variable_file}\"" 50 | } 51 | } 52 | 53 | # Update the AppRole secretid in the Ansible vars 54 | resource "null_resource" "update_secretid" { 55 | triggers = { 56 | # when the secretid changes 57 | key_id = vault_approle_auth_backend_role_secret_id.secretid.id 58 | } 59 | provisioner "local-exec" { 60 | command = "mkdir -p '${var.ansible_variable_dir}'" 61 | } 62 | provisioner "local-exec" { 63 | # Encrypt the secretid and write to the Ansible vars file 64 | command = <> "${var.ansible_variable_dir}/${var.ansible_variable_file}" 68 | EOT 69 | } 70 | } -------------------------------------------------------------------------------- /terraform/terraform.tfvars: -------------------------------------------------------------------------------- 1 | # Ansible Vault password file. The source can be a prompt, a file, 2 | # or a script, depending on how you are storing your vault passwords: 3 | # https://docs.ansible.com/ansible/latest/user_guide/vault.html 4 | ansible_vault_id = "../ansible-vault/vault-pass" 5 | 6 | # example: adding the vars to the role path 7 | #ansible_variable_dir = "../ansible/roles/vault-raft-backup-agent/vars" 8 | 9 | # example: adding the vars to a file 'main.yml' in folder '../ansible/group_vars/vault/' 10 | #ansible_variable_dir = "../ansible/group_vars/vault/" 11 | 12 | # example: adding the vars to a file 'vault.yml' in the folder '../ansible/group_vars/' 13 | ansible_variable_dir = "../ansible/group_vars/" 14 | ansible_variable_file = "vault.yml" 15 | -------------------------------------------------------------------------------- /terraform/variables.tf: -------------------------------------------------------------------------------- 1 | variable "snapshot_role_policy_name" { 2 | type = string 3 | description = "Name of the policy for the snapshot role" 4 | default = "snapshot" 5 | } 6 | 7 | variable "approle_role_id" { 8 | type = string 9 | description = "Name of the role for AppRole" 10 | default = "vault-snap-agent" 11 | } 12 | 13 | variable "approle_role_token_ttl" { 14 | type = string 15 | description = "TTL in seconds for snapshot role tokens" 16 | default = 7200 17 | } 18 | 19 | variable "ansible_vault_id" { 20 | type = string 21 | description = "Location of the ansible-vault password file relative to this folder" 22 | default = "../ansible/vault-pass" 23 | } 24 | 25 | variable "ansible_variable_dir" { 26 | type = string 27 | description = "The relative path to the Ansible variable directory" 28 | default = "../ansible/roles/vault-raft-backup-agent/vars" 29 | } 30 | 31 | variable "ansible_variable_file" { 32 | type = string 33 | description = "The name of the Ansible vars file" 34 | default = "main.yml" 35 | } 36 | 37 | variable "ansible_roleid_variable_name" { 38 | type = string 39 | description = "The variable name for the roleid" 40 | default = "vault_raft_bck_role_id" 41 | } 42 | 43 | variable "ansible_secretid_variable_name" { 44 | type = string 45 | description = "The variable name for the secretid encrypted with ansible-vault" 46 | default = "vault_raft_bck_secret_id" 47 | } --------------------------------------------------------------------------------