├── .gitattributes ├── .github ├── dependabot.yml └── workflows │ ├── CI.yml │ ├── consul-version.yml │ ├── nomad-version.yml │ └── update-tutorial-module-version.yml ├── .gitignore ├── LICENSE ├── Makefile ├── README.md ├── acl_tokens.tf ├── bootstrap-acls ├── acl-policies │ └── admin.hcl ├── jobs.tf ├── policies.tf ├── tokens.tf └── vars.tf ├── bucket.tf ├── cloud-shell ├── print.txt └── steps.md ├── consul_tls_ca.tf ├── consul_tls_cli.tf ├── consul_tls_client.tf ├── consul_tls_server.tf ├── container_registry.tf ├── dashboards ├── grafana.tf ├── nomad-clients.json └── providers.tf ├── diagrams ├── readme.d2 └── readme.svg ├── dns.tf ├── example ├── jobs │ └── folding-at-home.hcl ├── main.tf └── outputs.tf ├── go.mod ├── go.sum ├── gossip_keys.tf ├── jobs ├── count-dashboard.hcl ├── db │ ├── cockroach.hcl │ └── timescale.hcl ├── ingress │ ├── services.hcl │ ├── traefik.hcl │ └── traefik_template.hcl ├── logs │ ├── loki.hcl │ └── promtail.hcl ├── metrics │ ├── README.md │ └── metrics.hcl └── terraform │ └── main.tf ├── load_balancer.tf ├── local_files.tf ├── modules ├── load-balancer │ ├── main.tf │ ├── outputs.tf │ └── vars.tf ├── network │ ├── README.md │ ├── network.tf │ ├── outputs.tf │ ├── router.tf │ └── vars.tf ├── open-port │ ├── README.md │ ├── firewall.tf │ └── vars.tf └── vm │ ├── README.md │ ├── outputs.tf │ ├── vars.tf │ └── vm.tf ├── mtls-terminating-proxy ├── README.md └── main.go ├── network.tf ├── nomad_tls_ca.tf ├── nomad_tls_cli.tf ├── nomad_tls_client.tf ├── nomad_tls_server.tf ├── outputs.tf ├── packer ├── Makefile ├── configs │ ├── consul │ │ ├── client.hcl │ │ ├── consul.service │ │ └── server.hcl │ └── nomad │ │ ├── client.hcl │ │ ├── docker_auth_config.json │ │ ├── nomad.service │ │ └── server.hcl ├── scripts │ ├── install_cni_plugins.sh │ ├── install_consul.sh │ ├── install_docker-credential-gcr.sh │ ├── install_docker.sh │ ├── install_falco.sh │ ├── install_gvisor.sh │ ├── install_hashicorp_apt.sh │ ├── install_nomad.sh │ ├── install_required_packages.sh │ └── install_stack_driver_agents.sh └── template.pkr.hcl ├── providers.tf ├── setup_gcp.sh ├── ssh-mtls-terminating-proxy.go ├── ssh.tf ├── templates ├── client.sh └── server.sh ├── vars.tf └── vms.tf /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: terraform 4 | directory: "/" 5 | schedule: 6 | interval: daily 7 | time: "10:00" 8 | open-pull-requests-limit: 10 9 | -------------------------------------------------------------------------------- /.github/workflows/CI.yml: -------------------------------------------------------------------------------- 1 | 2 | name: CI 3 | 4 | on: 5 | push: 6 | branches: [ master ] 7 | pull_request: 8 | branches: [ master ] 9 | schedule: 10 | - cron: "0 9 * * *" 11 | 12 | 13 | jobs: 14 | 15 | CI: 16 | 17 | runs-on: ubuntu-latest 18 | 19 | steps: 20 | - uses: actions/checkout@v2 21 | - name: Install Terraform 22 | run: | 23 | wget https://releases.hashicorp.com/terraform/1.3.7/terraform_1.3.7_linux_amd64.zip 24 | unzip terraform_1.3.7_linux_amd64.zip 25 | sudo mv ./terraform /usr/local/bin 26 | rm -rf terraform_1.3.7_linux_amd64.zip 27 | 28 | - name: Run Terraform Init 29 | run: terraform init -backend=false 30 | 31 | - name: Run Terraform Validate 32 | run: terraform validate . 33 | -------------------------------------------------------------------------------- /.github/workflows/consul-version.yml: -------------------------------------------------------------------------------- 1 | name: Update Consul Version 2 | 3 | on: 4 | schedule: 5 | - cron: "0 8 * * *" 6 | 7 | jobs: 8 | update_consul_version: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - uses: actions/checkout@master 12 | with: 13 | persist-credentials: false 14 | fetch-depth: 0 15 | - name: Update Consul Version 16 | env: 17 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 18 | run: | 19 | cd ./packer/scripts 20 | CURRENT=$(cat install_consul.sh | grep "CONSUL_VERSION=" | awk -F '=' '{print $2}') 21 | LATEST=$(curl --silent https://releases.hashicorp.com/index.json | jq -r '.consul.versions | keys | .[]' | grep -v "-" | grep -v "+" | sort --version-sort | tail -n 1) 22 | if [ "$CURRENT" != "$LATEST" ]; then 23 | echo "Consul version $CURRENT is out-of-date, updating to $LATEST" 24 | # configure git 25 | git config user.name "Consul Update Bot" 26 | git config user.email "picatz@users.noreply.github.com" 27 | git remote set-url origin "https://$GITHUB_ACTOR:$GITHUB_TOKEN@github.com/$GITHUB_REPOSITORY" 28 | # update install script 29 | sed -i "s/$CURRENT/$LATEST/g" install_consul.sh 30 | # update README 31 | cd - 32 | sed -i "s/$CURRENT/$LATEST/g" README.md 33 | # rest of git workflow 34 | git add . 35 | git commit -m "consul-update-bot: update version from $CURRENT to $LATEST" || exit 1 36 | git status 37 | git push origin HEAD:master 38 | else 39 | echo "consul version $CURRENT is already up-to-date" 40 | fi -------------------------------------------------------------------------------- /.github/workflows/nomad-version.yml: -------------------------------------------------------------------------------- 1 | name: Update Nomad Version 2 | 3 | on: 4 | schedule: 5 | - cron: "0 9 * * *" 6 | 7 | jobs: 8 | update_nomad_version: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - uses: actions/checkout@master 12 | with: 13 | persist-credentials: false 14 | fetch-depth: 0 15 | - name: Update Nomad Version 16 | env: 17 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 18 | run: | 19 | cd ./packer/scripts 20 | CURRENT=$(cat install_nomad.sh | grep "NOMAD_VERSION=" | awk -F '=' '{print $2}') 21 | LATEST=$(curl --silent https://releases.hashicorp.com/index.json | jq -r '.nomad.versions | keys | .[]' | grep -v "-" | grep -v "+" | sort --version-sort | tail -n 1) 22 | if [ "$CURRENT" != "$LATEST" ]; then 23 | echo "Nomad version $CURRENT is out-of-date, updating to $LATEST" 24 | 25 | # configure git 26 | git config user.name "Nomad Update Bot" 27 | git config user.email "picatz@users.noreply.github.com" 28 | git remote set-url origin "https://$GITHUB_ACTOR:$GITHUB_TOKEN@github.com/$GITHUB_REPOSITORY" 29 | # update install script 30 | sed -i "s/$CURRENT/$LATEST/g" install_nomad.sh 31 | # update README 32 | cd - 33 | sed -i "s/$CURRENT/$LATEST/g" README.md 34 | # rest of git workflow 35 | git add . 36 | git commit -m "nomad-update-bot: update version from $CURRENT to $LATEST" || exit 1 37 | git status 38 | git push origin HEAD:master 39 | else 40 | echo "Nomad version $CURRENT is already up-to-date" 41 | fi -------------------------------------------------------------------------------- /.github/workflows/update-tutorial-module-version.yml: -------------------------------------------------------------------------------- 1 | 2 | name: Update Tutorial Module Version 3 | 4 | on: 5 | release: 6 | types: 7 | - published 8 | 9 | jobs: 10 | update: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: actions/checkout@v2 14 | - name: Update Version 15 | env: 16 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 17 | working-directory: cloud-shell 18 | run: | 19 | # small sleep buffer 20 | sleep 5 21 | # get previous and latest versions 22 | PREVIOUS_MODULE_VERSION=$(hub release -L 2 | tail -n 1 | awk -F 'v' '{print $2}') 23 | LATEST_MODULE_VERSION=$(hub release -L 1 | awk -F 'v' '{print $2}') 24 | # configure git 25 | git config user.name "Tutorial Update Bot" 26 | git config user.email "picatz@users.noreply.github.com" 27 | git remote set-url origin "https://$GITHUB_ACTOR:$GITHUB_TOKEN@github.com/$GITHUB_REPOSITORY" 28 | # update 29 | sed -i "s/$PREVIOUS_MODULE_VERSION/$LATEST_MODULE_VERSION/g" steps.md 30 | sed -i "s/$PREVIOUS_MODULE_VERSION/$LATEST_MODULE_VERSION/g" ../example/main.tf 31 | git add . 32 | git commit -m "tutorial-update-bot: update module version from $PREVIOUS_MODULE_VERSION to $LATEST_MODULE_VERSION" || exit 1 33 | git status 34 | git push origin HEAD:master 35 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | account.json 2 | .terraform* 3 | *tfstate* 4 | *.pem 5 | *.csr 6 | bastion 7 | bastion.pub 8 | *.p12 -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Kent 'picat' Gruber 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | SERVERS ?= 3 2 | CLIENTS ?= 5 3 | CLIENT_MACHINE_TYPE ?= n1-standard-2 4 | SERVER_MACHINE_TYPE ?= n1-standard-1 5 | DNS_ENABLED ?= false 6 | PUBLIC_DOMAIN ?= "" 7 | GRAFANA_LOAD_BALANCER_ENABLED ?= false 8 | GRAFANA_PUBLIC_DOMAIN ?= "" 9 | PROMSCALE_ENABLED ?= false 10 | SSH_BASTION_ENABLED ?= false 11 | 12 | .PHONY: help 13 | help: ## Print this help menu 14 | help: 15 | @echo HashiCorp Nomad on GCP 16 | @echo 17 | @echo Required environment variables: 18 | @echo "* GOOGLE_PROJECT (${GOOGLE_PROJECT})" 19 | @echo "* GOOGLE_APPLICATION_CREDENTIALS (${GOOGLE_APPLICATION_CREDENTIALS})" 20 | @echo 21 | @echo 'Usage: make ' 22 | @echo 23 | @echo 'Targets:' 24 | @egrep '^(.+)\:\ ##\ (.+)' $(MAKEFILE_LIST) | column -t -c 2 -s ':#' 25 | 26 | .PHONY: packer/init 27 | packer/init: ## Initializes the Packer config 28 | @cd packer && packer init template.pkr.hcl 29 | 30 | .PHONY: packer/validate 31 | packer/validate: ## Validates the Packer config 32 | @cd packer && packer validate template.pkr.hcl 33 | 34 | .PHONY: packer/build 35 | packer/build: ## Forces a build with Packer 36 | @cd packer && time packer build \ 37 | -force \ 38 | -timestamp-ui \ 39 | template.pkr.hcl 40 | 41 | .PHONY: terraform/validate 42 | terraform/validate: ## Validates the Terraform config 43 | @terraform validate 44 | 45 | .PHONY: terraform/plan 46 | terraform/plan: ## Runs the Terraform plan command 47 | @terraform plan \ 48 | -var="project=${GOOGLE_PROJECT}" \ 49 | -var="bastion_enabled=$(SSH_BASTION_ENABLED)" \ 50 | -var="server_instances=$(SERVERS)" \ 51 | -var="client_instances=$(CLIENTS)" \ 52 | -var="client_machine_type=$(CLIENT_MACHINE_TYPE)" \ 53 | -var="server_machine_type=$(SERVER_MACHINE_TYPE)" \ 54 | -var="grafana_load_balancer_enabled=$(GRAFANA_LOAD_BALANCER_ENABLED)" \ 55 | -var="grafana_dns_managed_zone_dns_name=$(GRAFANA_PUBLIC_DOMAIN)" \ 56 | -var="dns_enabled=$(DNS_ENABLED)" \ 57 | -var="dns_managed_zone_dns_name=$(PUBLIC_DOMAIN)" \ 58 | -var="credentials=${GOOGLE_APPLICATION_CREDENTIALS}" 59 | 60 | .PHONY: terraform/wait 61 | terraform/wait: ## Waits for infra to be ready 62 | @echo "... waiting 30 seconds for all services to be ready before starting proxy ..." 63 | @sleep 30 64 | 65 | .PHONY: terraform/output 66 | terraform/output: ## Gets the Terraform output 67 | @terraform output -json 68 | 69 | .PHONY: terraform/up 70 | terraform/up: terraform/apply terraform/wait ssh/proxy/mtls ## Spins up infrastructure and local proxy 71 | 72 | .PHONY: terraform/apply 73 | terraform/apply: ## Runs and auto-apporves the Terraform apply command 74 | @terraform apply \ 75 | -auto-approve \ 76 | -var="project=${GOOGLE_PROJECT}" \ 77 | -var="bastion_enabled=$(SSH_BASTION_ENABLED)" \ 78 | -var="server_instances=$(SERVERS)" \ 79 | -var="client_instances=$(CLIENTS)" \ 80 | -var="client_machine_type=$(CLIENT_MACHINE_TYPE)" \ 81 | -var="server_machine_type=$(SERVER_MACHINE_TYPE)" \ 82 | -var="grafana_load_balancer_enabled=$(GRAFANA_LOAD_BALANCER_ENABLED)" \ 83 | -var="grafana_dns_managed_zone_dns_name=$(GRAFANA_PUBLIC_DOMAIN)" \ 84 | -var="dns_enabled=$(DNS_ENABLED)" \ 85 | -var="dns_managed_zone_dns_name=$(PUBLIC_DOMAIN)" \ 86 | -var="credentials=${GOOGLE_APPLICATION_CREDENTIALS}" 87 | 88 | .PHONY: terraform/shutdown 89 | terraform/shutdown: ## Turns off all VM instances 90 | @terraform apply \ 91 | -auto-approve \ 92 | -var="project=${GOOGLE_PROJECT}" \ 93 | -var="bastion_enabled=$(SSH_BASTION_ENABLED)" \ 94 | -var="server_instances=0" \ 95 | -var="client_instances=0" \ 96 | -var="client_machine_type=$(CLIENT_MACHINE_TYPE)" \ 97 | -var="server_machine_type=$(SERVER_MACHINE_TYPE)" \ 98 | -var="grafana_load_balancer_enabled=$(GRAFANA_LOAD_BALANCER_ENABLED)" \ 99 | -var="grafana_dns_managed_zone_dns_name=$(GRAFANA_PUBLIC_DOMAIN)" \ 100 | -var="dns_enabled=$(DNS_ENABLED)" \ 101 | -var="dns_managed_zone_dns_name=$(PUBLIC_DOMAIN)" \ 102 | -var="credentials=${GOOGLE_APPLICATION_CREDENTIALS}" 103 | 104 | .PHONY: terraform/restart 105 | terraform/restart: terraform/shutdown terraform/apply ## Shuts down all VM instances and restarts them 106 | 107 | .PHONY: terraform/destroy 108 | terraform/destroy: ## Runs and auto-apporves the Terraform destroy command 109 | @terraform destroy \ 110 | -auto-approve \ 111 | -var="project=${GOOGLE_PROJECT}" \ 112 | -var="bastion_enabled=$(SSH_BASTION_ENABLED)" \ 113 | -var="server_instances=$(SERVERS)" \ 114 | -var="client_instances=$(CLIENTS)" \ 115 | -var="client_machine_type=$(CLIENT_MACHINE_TYPE)" \ 116 | -var="server_machine_type=$(SERVER_MACHINE_TYPE)" \ 117 | -var="grafana_load_balancer_enabled=$(GRAFANA_LOAD_BALANCER_ENABLED)" \ 118 | -var="grafana_dns_managed_zone_dns_name=$(GRAFANA_PUBLIC_DOMAIN)" \ 119 | -var="dns_enabled=$(DNS_ENABLED)" \ 120 | -var="dns_managed_zone_dns_name=$(PUBLIC_DOMAIN)" \ 121 | -var="credentials=${GOOGLE_APPLICATION_CREDENTIALS}" 122 | 123 | .PHONY: terraform/validate/example 124 | terraform/validate/example: ## Validates the example Terraform config 125 | @cd example && terraform validate 126 | 127 | .PHONY: terraform/plan/example 128 | terraform/plan/example: ## Runs the Terraform plan command for the example config 129 | @cd example && terraform plan \ 130 | -var="project=${GOOGLE_PROJECT}" \ 131 | -var="credentials=${GOOGLE_APPLICATION_CREDENTIALS}" 132 | 133 | .PHONY: terraform/apply/example 134 | terraform/apply/example: ## Runs and auto-apporves the Terraform apply command for the example config 135 | @cd example && terraform apply \ 136 | -auto-approve \ 137 | -var="project=${GOOGLE_PROJECT}" \ 138 | -var="credentials=${GOOGLE_APPLICATION_CREDENTIALS}" 139 | 140 | .PHONY: terraform/destroy/example 141 | terraform/destroy/example: ## Runs and auto-apporves the Terraform destroy command for the example config 142 | @cd example && terraform destroy \ 143 | -auto-approve \ 144 | -var="project=${GOOGLE_PROJECT}" \ 145 | -var="credentials=${GOOGLE_APPLICATION_CREDENTIALS}" 146 | 147 | .PHONY: ssh/client 148 | ssh/client: ## Connects to the client instance using SSH 149 | @gcloud compute ssh client-0 --tunnel-through-iap 150 | 151 | .PHONY: ssh/server 152 | ssh/server: ## Connects to the server instance using SSH 153 | @gcloud compute ssh server-0 --tunnel-through-iap 154 | 155 | .PHONY: ssh/proxy/consul 156 | ssh/proxy/consul: ## Forwards the Consul server port to localhost 157 | @gcloud compute ssh server-0 --tunnel-through-iap -- -f -N -L 127.0.0.1:8500:127.0.0.1:8500 158 | 159 | .PHONY: ssh/proxy/nomad 160 | ssh/proxy/nomad: ## Forwards the Nomad server port to localhost 161 | @gcloud compute ssh server-0 --tunnel-through-iap -- -f -N -L 127.0.0.1:4646:127.0.0.1:4646 162 | 163 | .PHONY: ssh/proxy/mtls 164 | ssh/proxy/mtls: ## Forwards the Consul and Nomad server port to localhost, using the custom mTLS terminating proxy script 165 | @go run ssh-mtls-terminating-proxy.go 166 | 167 | .PHONY: ssh/proxy/count-dashboard 168 | ssh/proxy/count-dashboard: ## Forwards the example dashboard service port to localhost 169 | @gcloud compute ssh client-0 --tunnel-through-iap -- -f -N -L 127.0.0.1:9002:0.0.0.0:9002 170 | 171 | .PHONY: gcloud/delete-metadata 172 | gcloud/delete-metadata: ## Deletes all metadata entries from client VMs 173 | @gcloud compute instances list | grep "client-" | awk '{print $1 " " $2}' | xargs -n2 bash -c 'gcloud compute instances remove-metadata $1 --zone=$2 --all' bash 174 | 175 | .PHONY: consul/metrics/acls 176 | consul/metrics/acls: ## Create a Consul policy, role, and token to use with prometheus 177 | @echo "📑 Creating Consul ACL Policy" 178 | @consul acl policy create -name "resolve-any-upstream" -rules 'service_prefix "" { policy = "read" } node_prefix "" { policy = "read" } agent_prefix "" { policy = "read" }' -token=$(shell terraform output consul_master_token) 179 | @echo "🎭 Creating Consul ACL Role" 180 | @consul acl role create -name "metrics" -policy-name "resolve-any-upstream" -token=$(shell terraform output consul_master_token) 181 | @echo "🔑 Creating Consul ACL Token to Use for Prometheus Consul Service Discovery" 182 | @consul acl token create -role-name "metrics" -token=$(shell terraform output consul_master_token) 183 | 184 | .PHONY: nomad/metrics 185 | nomad/metrics: ## Runs a Prometheus and Grafana stack on Nomad 186 | @nomad run -var='consul_targets=[$(shell terraform output -json | jq -r '(.server_internal_ips.value + .client_internal_ips.value) | map(.+":8501") | @csv')]' -var="consul_acl_token=$(consul_acl_token)" -var="consul_lb_ip=$(shell terraform output load_balancer_ip)" jobs/metrics/metrics.hcl 187 | 188 | .PHONY: nomad/logs 189 | nomad/logs: ## Runs a Loki and Promtail jobs on Nomad 190 | @nomad run jobs/logs/loki.hcl 191 | @nomad run jobs/logs/promtail.hcl 192 | 193 | .PHONY: nomad/ingress 194 | nomad/ingress: ## Runs a Traefik proxy to handle ingress traffic across the cluster 195 | @nomad run jobs/ingress/traefik.hcl 196 | 197 | .PHONY: nomad/cockroachdb 198 | nomad/cockroachdb: ## Runs a Cockroach DB cluster 199 | @nomad run jobs/db/cockroach.hcl 200 | @sleep 10s 201 | @echo "initializing database" 202 | @nomad alloc exec -i -t=false -task cockroach $(shell nomad status cockroach | grep "running" | grep "cockroach-1" | head -n 1 | awk '{print $$1}') cockroach init --insecure --host=localhost:26258 203 | @sleep 10s 204 | @echo "listing nodes" 205 | @nomad alloc exec -i -t=false -task cockroach $(shell nomad status cockroach | grep "running" | grep "cockroach-1" | head -n 1 | awk '{print $$1}') cockroach node ls --insecure --host=localhost:26258 206 | 207 | .PHONY: nomad/cockroachdb/nodes 208 | nomad/cockroachdb/nodes: ## List all Cockroach DB nodes 209 | @nomad alloc exec -i -t=false -task cockroach $(shell nomad status cockroach | grep "running" | grep "cockroach-1" | head -n 1 | awk '{print $$1}') cockroach node ls --insecure --host=localhost:26258 210 | 211 | .PHONY: nomad/cockroachdb/sql 212 | nomad/cockroachdb/sql: ## Start an interactive Cockroach DB SQL shell 213 | @nomad alloc exec -i -t=true -task cockroach $(shell nomad status cockroach | grep "running" | grep "cockroach-1" | head -n 1 | awk '{print $$1}') cockroach sql --insecure --host=localhost:26258 214 | 215 | .PHONY: nomad/bootstrap 216 | nomad/bootstrap: ## Bootstraps the ACL system on the Nomad cluster 217 | @nomad acl bootstrap 218 | 219 | .PHONY: mtls/init/macos/keychain 220 | mtls/init/macos/keychain: ## Create a new macOS keychain for Nomad 221 | @security create-keychain -P nomad 222 | 223 | .PHONY: mtls/install/macos/keychain 224 | mtls/install/macos/keychain: ## Install generated CA and client certificate in the macOS keychain 225 | @openssl pkcs12 -export -in nomad-cli-cert.pem -inkey nomad-cli-key.pem -out nomad-cli.p12 -CAfile nomad-ca.pem -name "Nomad CLI" 226 | @security import nomad-cli.p12 -k $(shell realpath ~/Library/Keychains/nomad-db) 227 | @sudo security add-trusted-cert -d -r trustRoot -k "/Library/Keychains/System.keychain" nomad-ca.pem 228 | 229 | .PHONY: mtls/proxy/nomad 230 | mtls/proxy/nomad: # Start mTLS local proxy for Nomad using github.com/picatz/mtls-proxy 231 | @mtls-proxy -listener-addr="127.0.0.1:4646" -target-addr="$(shell terraform output -raw load_balancer_ip):4646" -ca-file="nomad-ca.pem" -cert-file="nomad-cli-cert.pem" -key-file="nomad-cli-key.pem" -verify-dns-name="server.global.nomad" 232 | 233 | .PHONY: mtls/proxy/consul 234 | mtls/proxy/consul: # Start mTLS local proxy for Consul using github.com/picatz/mtls-proxy 235 | @mtls-proxy -listener-addr="127.0.0.1:8500" -target-addr="$(shell terraform output -raw load_balancer_ip):8501" -ca-file="consul-ca.pem" -cert-file="consul-cli-cert.pem" -key-file="consul-cli-key.pem" -verify-dns-name="server.dc1.consul" 236 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Nomad Cluster 2 | 3 | [![Nomad Version](https://img.shields.io/badge/Nomad%20Version-1.6.1-00bc7f.svg)](https://www.nomadproject.io/downloads) [![Consul Version](https://img.shields.io/badge/Consul%20Version-1.16.1-ca2171.svg)](https://www.consul.io/downloads) 4 | 5 | [Terraform](https://www.terraform.io/) Module for [Nomad](https://nomadproject.io/) clusters with [Consul](https://www.consul.io/) on [GCP](https://cloud.google.com/). 6 | 7 | ## Module Features 8 | 9 | * Includes HashiCorp's [Consul](https://www.consul.io/) service mesh 10 | * Gossip encryption, mTLS, and ACLs enabled for Nomad and Consul 11 | * Optional load balancer and DNS configuration 12 | * Optional SSH bastion host 13 | * Only the [Docker task driver](https://www.nomadproject.io/docs/drivers/docker) is enabled 14 | * Installs the [gVisor](https://gvisor.dev/) container runtime (`runsc`) 15 | * Installs the [Falco](https://falco.org/) runtime security monitor 16 | 17 | ## Cloud Shell Interactive Tutorial 18 | 19 | For a full interactive tutorial to get started using this module: 20 | 21 | [![Open in Cloud Shell](https://gstatic.com/cloudssh/images/open-btn.svg)](https://ssh.cloud.google.com/cloudshell/editor?cloudshell_git_repo=https%3A%2F%2Fgithub.com%2Fpicatz%2Fterraform-google-nomad&cloudshell_print=cloud-shell%2Fprint.txt&cloudshell_tutorial=cloud-shell%2Fsteps.md&shellonly=true) 22 | 23 | ## Infrastructure Diagram 24 | 25 |

26 | Infrastructure Diagram 27 |

28 | 29 | ## Logs 30 | 31 | Logs are centralized using GCP's [Cloud Logging](https://cloud.google.com/logging). You can use the following filter to see all Nomad agent logs: 32 | 33 | ```console 34 | $ gcloud logging read 'resource.type="gce_instance" jsonPayload.ident="nomad"' 35 | ... 36 | ``` 37 | 38 | ```console 39 | $ gcloud logging read 'resource.type="gce_instance" jsonPayload.ident="nomad" jsonPayload.host="server-0"' --format=json | jq -r '.[] | .jsonPayload.message' | less 40 | ... 41 | ``` 42 | 43 | Logs can also be collected within the cluster using Promtail and Loki, then visualized using Grafana (optionally exposed using a public load balancer and DNS name). 44 | 45 | ```console 46 | $ DNS_ENABLED=true PUBLIC_DOMAIN="nomad.your-domain.com" make terraform/apply 47 | ... 48 | $ export CONSUL_HTTP_TOKEN=$(terraform output -json | jq -r .consul_master_token.value) 49 | $ make consul/metrics/acls 50 | ... 51 | 🔑 Creating Consul ACL Token to Use for Prometheus Consul Service Discovery 52 | AccessorID: 15b9a51d-7af4-e8d4-7c09-312c594a5907 53 | SecretID: 2a1c7926-b6e3-566e-ddf5-b19279fa134e 54 | Description: 55 | Local: false 56 | Create Time: 2021-04-11 16:16:03.90231.6.1 +0000 UTC 57 | Roles: 58 | 6ae941.6.1c07-49a7-fa95-8ce14aa8a75e - metrics 59 | 60 | $ consul_acl_token=2a1c7926-b6e3-566e-ddf5-b19279fa134e make nomad/metrics 61 | $ make nomad/logs 62 | $ make nomad/ingress 63 | $ GRAFANA_PUBLIC_DOMAIN="grafana.your-domain.com" GRAFANA_LOAD_BALANCER_ENABLED=true DNS_ENABLED=true PUBLIC_DOMAIN="nomad.your-domain.com" make terraform/apply 64 | $ open http://public.grafana.your-domain.com:3000/login 65 | ``` 66 | 67 | ## Bootstrap ACL Token 68 | 69 | If the cluster is started with ACLs enabled, which is the default behavior of this module, you may see this: 70 | 71 | ```console 72 | $ export NOMAD_ADDR="https://$(terraform output -json | jq -r .load_balancer_ip.value):4646" 73 | $ nomad status 74 | Error querying jobs: Unexpected response code: 403 (Permission denied) 75 | ``` 76 | 77 | We can bootstrap ACLs to get the bootstrap management token like so: 78 | 79 | ```console 80 | $ nomad acl bootstrap 81 | Accessor ID = a1495889-37ce-6784-78f3-31.6.1984bca 82 | Secret ID = dc8c0349-c1fd-dc2c-299c-d513e5dd6df2 83 | Name = Bootstrap Token 84 | Type = management 85 | Global = true 86 | Policies = n/a 87 | Create Time = 2020-04-27 05:24:43.734587566 +0000 UTC 88 | Create Index = 7 89 | Modify Index = 7 90 | ``` 91 | 92 | Then we can use that token (Secret ID) to perform the rest of the ACL bootstrapping process: 93 | 94 | ```console 95 | $ export NOMAD_TOKEN="dc8c0349-c1fd-dc2c-299c-d513e5dd6df2" 96 | $ nomad status 97 | No running jobs 98 | $ ... 99 | ``` 100 | 101 | ## Use `ssh-mtls-terminating-proxy` to access the Nomad UI 102 | 103 | When using the SSH bastion, you can use the `ssh-mtls-terminating-proxy.go` helper script to tunnel a connection from localhost to the Nomad server API: 104 | 105 | ```console 106 | $ make ssh/proxy/mtls 107 | 2021/04/11.16.18:28 getting terraform output 108 | 2021/04/11.16.18:29 Bastion IP: "34.73.106.60" 109 | 2021/04/11.16.18:29 Server IP: "1.6.168.2.8" 110 | 2021/04/11.16.18:29 Setting up SSH agent 111 | 2021/04/11.16.18:29 connecting to the bastion 112 | 2021/04/11.16.18:29 connecting to the server through the bastion 113 | 2021/04/11.16.18:30 wrapping the server connection with SSH through the bastion 114 | 2021/04/11.16.18:30 tunneling a new connection for Consul to the server with SSH through the bastion 115 | 2021/04/11.16.18:30 loading Consul TLS data 116 | 2021/04/11.16.18:30 tunneling a new connection for somad to the server with ssh through the bastion 117 | 2021/04/11.16.18:30 loading Nomad TLS data 118 | 2021/04/11.16.18:30 starting Consul local listener on localhost:8500 119 | 2021/04/11.16.18:30 starting Nomad local listener on localhost:4646 120 | ... 121 | ``` 122 | 123 | Then open your browser at `http://localhost:4646/ui/` to securely access the Nomad UI. 124 | -------------------------------------------------------------------------------- /acl_tokens.tf: -------------------------------------------------------------------------------- 1 | resource "random_uuid" "consul_master_token" {} -------------------------------------------------------------------------------- /bootstrap-acls/acl-policies/admin.hcl: -------------------------------------------------------------------------------- 1 | namespace "default" { 2 | policy = "write" 3 | } 4 | 5 | agent { 6 | policy = "write" 7 | } 8 | 9 | node { 10 | policy = "write" 11 | } 12 | 13 | operator { 14 | policy = "write" 15 | } 16 | 17 | quota { 18 | policy = "write" 19 | } 20 | 21 | host_volume { 22 | policy = "write" 23 | } 24 | -------------------------------------------------------------------------------- /bootstrap-acls/jobs.tf: -------------------------------------------------------------------------------- 1 | provider "nomad" { 2 | address = format("https://%s:4646", module.load_balancer.external_ip) 3 | ca_file = local_file.ca_file.filename 4 | cert_file = local_file.cli_cert.filename 5 | key_file = local_file.cli_key.filename 6 | secret_id = var.nomad_token 7 | } 8 | -------------------------------------------------------------------------------- /bootstrap-acls/policies.tf: -------------------------------------------------------------------------------- 1 | resource "nomad_acl_policy" "admin" { 2 | name = "admin" 3 | description = "Policy for Nomad admins." 4 | rules_hcl = file("${path.module}/acl-policies/admin.hcl") 5 | } 6 | -------------------------------------------------------------------------------- /bootstrap-acls/tokens.tf: -------------------------------------------------------------------------------- 1 | resource "nomad_acl_token" "admin" { 2 | type = "client" 3 | name = "admin" 4 | policies = [nomad_acl_policy.admin.name] 5 | } -------------------------------------------------------------------------------- /bootstrap-acls/vars.tf: -------------------------------------------------------------------------------- 1 | variable "nomad_token" { 2 | type = string 3 | description = "Nomad token to use for administration." 4 | } 5 | 6 | variable "ca_file" { 7 | type = string 8 | default = "../nomad-ca.pem" 9 | } 10 | 11 | variable "cli_cert" { 12 | type = string 13 | default = "../nomad-cli-cert.pem" 14 | } 15 | 16 | variable "cli_key" { 17 | type = string 18 | default = "../nomad-cli-key.pem" 19 | } -------------------------------------------------------------------------------- /bucket.tf: -------------------------------------------------------------------------------- 1 | resource "google_storage_bucket" "containers" { 2 | name = format("%s-containers", var.project) 3 | location = var.bucket_location 4 | force_destroy = true 5 | } -------------------------------------------------------------------------------- /cloud-shell/print.txt: -------------------------------------------------------------------------------- 1 | HashiCorp Nomad on GCP -------------------------------------------------------------------------------- /cloud-shell/steps.md: -------------------------------------------------------------------------------- 1 | # Deploy a Nomad Cluster to GCP 2 | 3 | ## Welcome! 4 | 5 | 👩🏽‍💻This tutorial will teach you how to deploy [Nomad](https://www.nomadproject.io/) clusters to the Google Cloud Platform using [Packer](https://www.packer.io/) and [Terraform](https://www.terraform.io/)! 6 | 7 | **Includes**: 8 | 9 | 1. 🛠 Setting up your cloud shell environment with `nomad`, `packer`, and `terraform` binaries. 10 | 2. ⚙️ Configuring a new GCP project, linking the billing account, and enabling the compute engine API using `gcloud`. 11 | 3. 📦 Packaging cluster golden images (bastion, server, and client) with `packer`. 12 | 4. ☁️ Deploying a Nomad cluster using `terraform`. 13 | 5. 🔐 Bootstrapping the [ACL system](https://learn.hashicorp.com/nomad/acls/fundamentals), obtaining a administrative management token. 14 | 6. 🐳 Submitting an example job as a Docker container running [Folding at Home](https://foldingathome.org/) to the cluster, tailing the logs, and then stopping the container. 15 | 16 | ## Setup Environment 17 | 18 | Before we can deploy our cluster, we need to setup our environment with the required HashiCorp tools. 19 | 20 | ### Download Nomad 21 | 22 | Download the latest version of [Nomad](https://www.nomadproject.io/) from HashiCorp's website by copying and pasting this snippet in the terminal: 23 | 24 | ```console 25 | curl "https://releases.hashicorp.com/nomad/0.12.0/nomad_0.12.0_linux_amd64.zip" -o nomad.zip 26 | unzip nomad.zip 27 | sudo mv nomad /usr/local/bin 28 | nomad --version 29 | ``` 30 | 31 | ### Download Packer 32 | 33 | Download the latest version of [Packer](https://www.packer.io/) from HashiCorp's website by copying and pasting this snippet in the terminal: 34 | 35 | ```console 36 | curl "https://releases.hashicorp.com/packer/1.6.0/packer_1.6.0_linux_amd64.zip" -o packer.zip 37 | unzip packer.zip 38 | sudo mv packer /usr/local/bin 39 | packer --version 40 | ``` 41 | 42 | ### Download Terraform 43 | 44 | Download the latest version of [Terraform](https://www.terraform.io/) from HashiCorp's website by copying and pasting this snippet in the terminal: 45 | 46 | ```console 47 | curl "https://releases.hashicorp.com/terraform/0.12.28/terraform_0.12.28_linux_amd64.zip" -o terraform.zip 48 | unzip terraform.zip 49 | sudo mv terraform /usr/local/bin 50 | terraform --version 51 | ``` 52 | 53 | 🎉 You have now installed `nomad`, `packer`, and `terraform`! 54 | 55 | ### Next Step 56 | 57 | Now that we have our tools, let's configure our GCP project. 58 | 59 | ## Configure GCP Project 60 | 61 | Before building our infrastructure, we'll need to do a few things: 62 | 63 | 1. Create a new GCP project. 64 | 2. Link a billing account to that project. 65 | 3. Enable the [compute engine](https://cloud.google.com/compute). 66 | 4. Create a Terraform Service Account, with a credentials file (`account.json`). 67 | 5. Set the required environment variables to move onto the next steps. 68 | 69 | ### Create a New Project 70 | 71 | To get started, let's create a new GCP project with the following command: 72 | 73 | ```console 74 | gcloud projects create your-new-project-name 75 | ``` 76 | 77 | Now export the project name as the `GOOGLE_PROJECT` environment variable: 78 | 79 | ```console 80 | export GOOGLE_PROJECT="your-new-project-name" 81 | ``` 82 | 83 | And then set your `gcloud` config to use that project: 84 | 85 | ```console 86 | gcloud config set project $GOOGLE_PROJECT 87 | ``` 88 | 89 | ### Link Billing Account to Project 90 | 91 | Next, let's link a billing account to that project. To determine what billing accounts are available, run the following command: 92 | 93 | ```console 94 | gcloud alpha billing accounts list 95 | ``` 96 | 97 | Then set the billing account ID `GOOGLE_BILLING_ACCOUNT` environment variable: 98 | 99 | ```console 100 | export GOOGLE_BILLING_ACCOUNT="XXXXXXX" 101 | ``` 102 | 103 | So we can link the `GOOGLE_BILLING_ACCOUNT` with the previously created `GOOGLE_PROJECT`: 104 | 105 | ```console 106 | gcloud alpha billing projects link "$GOOGLE_PROJECT" --billing-account "$GOOGLE_BILLING_ACCOUNT" 107 | ``` 108 | 109 | ### Enable Compute API 110 | 111 | In order to deploy VMs to the project, we need to enable the compute API: 112 | 113 | ```console 114 | gcloud services enable compute.googleapis.com 115 | ``` 116 | 117 | > ℹ️ **Note** 118 | > 119 | > The command will take a few minutes to complete. 120 | 121 | ### Create Terraform Service Account 122 | 123 | Finally, let's create a Terraform Service Account user and its `account.json` credentials file: 124 | 125 | ```console 126 | gcloud iam service-accounts create terraform \ 127 | --display-name "Terraform Service Account" \ 128 | --description "Service account to use with Terraform" 129 | ``` 130 | 131 | ```console 132 | gcloud projects add-iam-policy-binding "$GOOGLE_PROJECT" \ 133 | --member serviceAccount:"terraform@$GOOGLE_PROJECT.iam.gserviceaccount.com" \ 134 | --role roles/editor 135 | ``` 136 | 137 | ```console 138 | gcloud iam service-accounts keys create account.json \ 139 | --iam-account "terraform@$GOOGLE_PROJECT.iam.gserviceaccount.com" 140 | ``` 141 | 142 | > ⚠️ **Warning** 143 | > 144 | > The `account.json` credentials gives privelleged access to this GCP project. Be sure to prevent from accidently leaking these credentials in version control systems such as `git`. In general, as an operator on your own host machine, or in your own GCP cloud shell is ok. However, using a secrets management system like HashiCorp [Vault](https://www.vaultproject.io/) can often be a better solution for teams. For this tutorial's purposes, we'll be storing the `account.json` credentials on disk in the cloud shell. 145 | 146 | Now set the *full path* of the newly created `account.json` file as `GOOGLE_APPLICATION_CREDENTIALS` environment variable. 147 | 148 | ```console 149 | export GOOGLE_APPLICATION_CREDENTIALS=$(realpath account.json) 150 | ``` 151 | 152 | ### Ensure Required Environment Variables Are Set 153 | 154 | Before moving onto the next steps, ensure the following environment variables are set: 155 | 156 | * `GOOGLE_PROJECT` with your selected GCP project name. 157 | * `GOOGLE_APPLICATION_CREDENTIALS` with the *full path* to the Terraform Service Account `account.json` credentials file created with the last step. 158 | 159 | ## Build Images with Packer 160 | 161 | To build the cluster images, change into the `packer` directory: 162 | 163 | ```console 164 | cd packer 165 | ``` 166 | 167 | And then run the following command which will use the `template.json` file to build the bastion, server, and client images in parallel. 168 | 169 | ```console 170 | packer build -force template.json 171 | ``` 172 | 173 | > ℹ️ **Note** 174 | > 175 | > The command will take about 5 minutes to complete. 176 | 177 | Once the command completes successfully, change back to previous folder to move onto the next phase: 178 | 179 | ```console 180 | cd .. 181 | ``` 182 | 183 | ## Deploy Infrastructure with Terraform 184 | 185 | 🙌🏽 Now to finally deploy the Nomad cluster using Terraform! 186 | 187 | ### Example Configuration 188 | 189 | The `example` directory contains an simple Terraform configuration using the [`picatz/google/nomad`](https://registry.terraform.io/modules/picatz/nomad/google) module: 190 | 191 | > ℹ️ **Terraform Configuration** 192 | > 193 | > The `example/main.tf` file contains: 194 | > 195 | > ```hcl 196 | > variable "project" { 197 | > description = "The GCP project name to deploy the cluster to." 198 | > } 199 | > 200 | > variable "credentials" { 201 | > description = "The GCP credentials file path to use, preferably a Terraform Service Account." 202 | > } 203 | > 204 | > module "nomad" { 205 | > source = "picatz/nomad/google" 206 | > version = "2.7.8" 207 | > project = var.project 208 | > credentials = var.credentials 209 | > bastion_enabled = false 210 | > server_instances = 1 211 | > client_instances = 1 212 | > } 213 | > ``` 214 | 215 | The configuration disables the default SSH bastion to the cluster, as well as running only one server and client node to save costs. 216 | 217 | ### Initialize Terraform 218 | 219 | Run the following command to change into the `example` directory: 220 | 221 | ```console 222 | cd example 223 | ``` 224 | 225 | Then initialize Terraform which will download the module from the Terraform Registry: 226 | 227 | ```console 228 | terraform init 229 | ``` 230 | 231 | ### Plan Changes 232 | 233 | To plan our infrastructure changes, use `terraform plan`: 234 | 235 | ```console 236 | terraform plan -var="project=$GOOGLE_PROJECT" -var="credentials=$GOOGLE_APPLICATION_CREDENTIALS" 237 | ``` 238 | 239 | ### Apply Changes 240 | 241 | To apply the changes, actually creating the cluster: 242 | 243 | ```console 244 | terraform apply -auto-approve -var="project=$GOOGLE_PROJECT" -var="credentials=$GOOGLE_APPLICATION_CREDENTIALS" 245 | ``` 246 | 247 | > ℹ️ **Note** 248 | > 249 | > The command will take about 5 minutes to complete. 250 | 251 | ### Set Environment Variables 252 | 253 | Using the Terraform outputs, we can set the required Nomad environment variables to securely access to the Nomad cluster API using the TLS information, and load balancer created with the previous step: 254 | 255 | ```console 256 | export NOMAD_ADDR="https://$(terraform output -json | jq -r .load_balancer_ip.value):4646" 257 | export NOMAD_CACERT="$(realpath nomad-ca.pem)" 258 | export NOMAD_CLIENT_CERT="$(realpath nomad-cli-cert.pem)" 259 | export NOMAD_CLIENT_KEY="$(realpath nomad-cli-key.pem)" 260 | ``` 261 | 262 | ## Bootstrap ACL System 263 | 264 | To create an administrative management token (only meant to be used by Nomad Administrators), run the following command: 265 | 266 | ```console 267 | nomad acl bootstrap 268 | ``` 269 | 270 | > ℹ️ **Note** 271 | > 272 | > If the command above errors due to an i/o timeout, try rerunning the command again. This will happen when attempting to `bootstrap` a cluster that hasn't started yet. This should only take a few minutes at the most. 273 | 274 | Then we can use the token (Secret ID) in the previous command's output to access the cluster by setting the `NOMAD_TOKEN` environment variable: 275 | 276 | ```console 277 | export NOMAD_TOKEN="XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX" 278 | ``` 279 | 280 | To verify access to the Nomad API, run the following command: 281 | 282 | ```console 283 | nomad status 284 | ``` 285 | 286 | Which should output: 287 | 288 | ```plaintext 289 | No running jobs 290 | ``` 291 | 292 | > ℹ️ **Learn How To Create Custom ACLs** 293 | > 294 | > Now that you have a management token, you can [learn ACL system fundamentals](https://learn.hashicorp.com/nomad/acls/fundamentals) to tune the ACL system for your cluster's needs. 295 | 296 | ### Run a Docker Container 297 | 298 | Now that we deployed the cluster, let's use it to submit an example job using a Docker container to run [Folding at Home](https://foldingathome.org/): 299 | 300 | ```hcl 301 | job "folding-at-home" { 302 | datacenters = ["dc1"] 303 | group "folding-at-home" { 304 | task "folding-at-home" { 305 | driver = "docker" 306 | config { 307 | image = "kentgruber/fah-client:latest" 308 | } 309 | } 310 | } 311 | } 312 | ``` 313 | 314 | > ℹ️ **Note** 315 | > 316 | > There are many other [task drivers](https://www.nomadproject.io/docs/drivers) available for Nomad, but the `picatz/google/nomad` module is setup to support just the [Docker Driver](https://www.nomadproject.io/docs/drivers/docker) by default. 317 | > 318 | 319 | To submit the job to the cluster, run the following command using the `jobs/folding-at-home.hcl` job file: 320 | 321 | ```console 322 | nomad run jobs/folding-at-home.hcl 323 | ``` 324 | 325 | Command output will look *something* like this: 326 | 327 | ```plaintext 328 | ==> Monitoring evaluation "c01bbaa9" 329 | Evaluation triggered by job "folding-at-home" 330 | Evaluation within deployment: "811df760" 331 | Allocation "6311f4ea" created: node "fab91380", group "folding-at-home" 332 | Evaluation status changed: "pending" -> "complete" 333 | ==> Evaluation "c01bbaa9" finished with status "complete" 334 | ``` 335 | 336 | Now check the status of the cluster again: 337 | 338 | ```console 339 | nomad status 340 | ``` 341 | 342 | Output will look *something* like this: 343 | 344 | ```plaintext 345 | ID Type Priority Status Submit Date 346 | folding-at-home service 50 running 2020-07-11T19:36:47-04:00 347 | ``` 348 | 349 | To check the status of the `folding-at-home` job, run the folliwng command: 350 | 351 | ```console 352 | nomad status folding-at-home 353 | ``` 354 | 355 | Output will look *something* like this: 356 | 357 | ```plaintext 358 | ID = folding-at-home 359 | Name = folding-at-home 360 | Submit Date = 2020-07-11T19:36:47-04:00 361 | Type = service 362 | Priority = 50 363 | Datacenters = dc1 364 | Namespace = default 365 | Status = running 366 | Periodic = false 367 | Parameterized = false 368 | 369 | Summary 370 | Task Group Queued Starting Running Failed Complete Lost 371 | folding-at-home 0 0 1 0 0 0 372 | 373 | Latest Deployment 374 | ID = 811df760 375 | Status = successful 376 | Description = Deployment completed successfully 377 | 378 | Deployed 379 | Task Group Desired Placed Healthy Unhealthy Progress Deadline 380 | folding-at-home 1 1 1 0 2020-07-11T23:47:06Z 381 | 382 | Allocations 383 | ID Node ID Task Group Version Desired Status Created Modified 384 | 6311f4ea fab91380 folding-at-home 0 run running 3m4s ago 2m45s ago 385 | ``` 386 | 387 | ☝🏽We can see in the output from the last command a `Allocations` section with an ID (in this case `6311f4ea`). We can use this allocation ID to interact with the container. 388 | 389 | To tail/follow the logs (STDOUT, by default) of the container: 390 | 391 | ```console 392 | nomad alloc logs -f 6311f4ea 393 | ``` 394 | 395 | > ℹ️ **Note** 396 | > 397 | > Press [CTRL+C](https://en.wikipedia.org/wiki/Control-C) to quit tailing/following the logs. 398 | 399 | 400 | ### Stop Container 401 | 402 | To stop the container, we can stop the `folding-at-home` job: 403 | 404 | ```console 405 | nomad job stop folding-at-home 406 | ``` 407 | 408 | Output will look *something* like this: 409 | 410 | ```plaintext 411 | ==> Monitoring evaluation "b6144971" 412 | Evaluation triggered by job "folding-at-home" 413 | Evaluation within deployment: "811df760" 414 | Evaluation status changed: "pending" -> "complete" 415 | ==> Evaluation "b6144971" finished with status "complete" 416 | ``` 417 | 418 | ## Doing More with Nomad 419 | 420 | Ready to start running other containers in the cluster, or interested in what other things Nomad can do? Check out these awesome resources: 421 | 422 | ### HashiCorp Learn 423 | 424 | The official [HashiCorp Learn](https://learn.hashicorp.com/) platform provides tutorials for: 425 | 426 | * [Gettings Started with Jobs](https://learn.hashicorp.com/nomad/getting-started/jobs) 427 | * [ACL System Fundamentlas](https://learn.hashicorp.com/nomad/acls/fundamentals) 428 | * [Advanced Scheduling](https://learn.hashicorp.com/nomad/advanced-scheduling/advanced-scheduling) 429 | * [Stateful Workloads](https://learn.hashicorp.com/nomad/stateful-workloads/stateful-workloads) 430 | * [Task Depencies](https://learn.hashicorp.com/nomad/task-deps/interjob) 431 | * And much [more](https://learn.hashicorp.com/nomad)! 432 | 433 | ### Documentation 434 | 435 | * [Schedulers](https://www.nomadproject.io/docs/schedulers) 436 | * [Job Specification](https://www.nomadproject.io/docs/job-specification) 437 | * [Security Model](https://www.nomadproject.io/docs/internals/security) 438 | * And much [more](https://www.nomadproject.io/docs)! 439 | 440 | ### Example Job Files 441 | 442 | * [Charlie Voiselle's Collection of Nomad Job Examples](https://github.com/angrycub/nomad_example_jobs) 443 | * [Guy Barros' Collection of Nomad Jobs](https://github.com/GuyBarros/nomad_jobs) 444 | 445 | ## Conclusion 446 | 447 | 👏🏽 You have now deployed a Nomad cluster, bootstrapped the ACL system with a management token, and submitted an example job using a Docker container, yay! 448 | 449 | ### Destroy Infrastructure 450 | 451 | Once you are done [playing around with Nomad](https://learn.hashicorp.com/nomad), and wish to destroy the infrastructure to save costs, run the following command: 452 | 453 | ```console 454 | terraform destroy -auto-approve -var="project=$GOOGLE_PROJECT" -var="credentials=$GOOGLE_APPLICATION_CREDENTIALS" 455 | ``` 456 | -------------------------------------------------------------------------------- /consul_tls_ca.tf: -------------------------------------------------------------------------------- 1 | resource "tls_private_key" "consul-ca" { 2 | algorithm = "RSA" 3 | rsa_bits = "2048" 4 | } 5 | 6 | resource "tls_self_signed_cert" "consul-ca" { 7 | is_ca_certificate = true 8 | validity_period_hours = var.tls_validity_period_hours 9 | 10 | private_key_pem = tls_private_key.consul-ca.private_key_pem 11 | 12 | subject { 13 | common_name = "consul-ca.local" 14 | organization = var.tls_organization 15 | } 16 | 17 | allowed_uses = [ 18 | "cert_signing", 19 | "digital_signature", 20 | "key_encipherment", 21 | ] 22 | } 23 | -------------------------------------------------------------------------------- /consul_tls_cli.tf: -------------------------------------------------------------------------------- 1 | resource "tls_private_key" "consul-cli" { 2 | algorithm = "RSA" 3 | rsa_bits = "2048" 4 | } 5 | 6 | resource "tls_cert_request" "consul-cli" { 7 | private_key_pem = tls_private_key.consul-cli.private_key_pem 8 | 9 | ip_addresses = [ 10 | module.load_balancer.external_ip, 11 | "127.0.0.1", 12 | ] 13 | 14 | dns_names = [ 15 | "localhost", 16 | "cli.dc1.consul", 17 | ] 18 | 19 | subject { 20 | common_name = "client.global.consul" 21 | organization = var.tls_organization 22 | } 23 | } 24 | 25 | resource "tls_locally_signed_cert" "consul-cli" { 26 | cert_request_pem = tls_cert_request.consul-cli.cert_request_pem 27 | 28 | ca_private_key_pem = tls_private_key.consul-ca.private_key_pem 29 | ca_cert_pem = tls_self_signed_cert.consul-ca.cert_pem 30 | 31 | validity_period_hours = var.tls_validity_period_hours 32 | 33 | allowed_uses = [ 34 | "digital_signature", 35 | "key_encipherment", 36 | ] 37 | } -------------------------------------------------------------------------------- /consul_tls_client.tf: -------------------------------------------------------------------------------- 1 | # TODO(kent): delete once auto_encrypt is setup/verified 2 | 3 | resource "tls_private_key" "consul-client" { 4 | algorithm = "RSA" 5 | rsa_bits = "2048" 6 | } 7 | 8 | resource "tls_cert_request" "consul-client" { 9 | private_key_pem = tls_private_key.consul-client.private_key_pem 10 | 11 | ip_addresses = [ 12 | "127.0.0.1", 13 | ] 14 | 15 | dns_names = [ 16 | "localhost", 17 | "client.dc1.consul", 18 | ] 19 | 20 | subject { 21 | common_name = "client.dc1.consul" 22 | organization = var.tls_organization 23 | } 24 | } 25 | 26 | resource "tls_locally_signed_cert" "consul-client" { 27 | cert_request_pem = tls_cert_request.consul-client.cert_request_pem 28 | 29 | ca_private_key_pem = tls_private_key.consul-ca.private_key_pem 30 | ca_cert_pem = tls_self_signed_cert.consul-ca.cert_pem 31 | 32 | validity_period_hours = var.tls_validity_period_hours 33 | 34 | allowed_uses = [ 35 | "server_auth", 36 | "client_auth", 37 | ] 38 | } -------------------------------------------------------------------------------- /consul_tls_server.tf: -------------------------------------------------------------------------------- 1 | resource "tls_private_key" "consul-server" { 2 | algorithm = "RSA" 3 | rsa_bits = "2048" 4 | } 5 | 6 | resource "tls_cert_request" "consul-server" { 7 | private_key_pem = tls_private_key.consul-server.private_key_pem 8 | 9 | ip_addresses = [ 10 | module.load_balancer.external_ip, 11 | "127.0.0.1", 12 | ] 13 | 14 | dns_names = [ 15 | "localhost", 16 | "server.dc1.consul", 17 | ] 18 | 19 | subject { 20 | common_name = "server.dc1.consul" 21 | organization = var.tls_organization 22 | } 23 | } 24 | 25 | resource "tls_locally_signed_cert" "consul-server" { 26 | cert_request_pem = tls_cert_request.consul-server.cert_request_pem 27 | 28 | ca_private_key_pem = tls_private_key.consul-ca.private_key_pem 29 | ca_cert_pem = tls_self_signed_cert.consul-ca.cert_pem 30 | 31 | validity_period_hours = var.tls_validity_period_hours 32 | 33 | allowed_uses = [ 34 | "server_auth", 35 | "client_auth", 36 | ] 37 | } -------------------------------------------------------------------------------- /container_registry.tf: -------------------------------------------------------------------------------- 1 | // Not actually used because it breaks things: https://github.com/hashicorp/nomad/issues/9991 2 | // 3 | // However, using a cloud storage bucket with an atrifact stanza works to share/run private containers. 4 | resource "google_container_registry" "nomad" { 5 | location = "US" 6 | } -------------------------------------------------------------------------------- /dashboards/grafana.tf: -------------------------------------------------------------------------------- 1 | provider "grafana" { 2 | // GRAFANA_AUTH 3 | // GRAFANA_URL 4 | } 5 | 6 | resource "grafana_data_source" "prometheus" { 7 | type = "prometheus" 8 | name = "Prometheus" 9 | url = "http://127.0.0.1:9090" 10 | is_default = true 11 | } 12 | 13 | resource "grafana_data_source" "loki" { 14 | type = "loki" 15 | name = "Loki" 16 | url = "http://127.0.0.1:3100" 17 | } 18 | 19 | resource "grafana_dashboard" "nomad_clients" { 20 | config_json = file("nomad-clients.json") 21 | } -------------------------------------------------------------------------------- /dashboards/nomad-clients.json: -------------------------------------------------------------------------------- 1 | { 2 | "annotations": { 3 | "list": [ 4 | { 5 | "builtIn": 1, 6 | "datasource": "-- Grafana --", 7 | "enable": true, 8 | "hide": true, 9 | "iconColor": "rgba(0, 211, 255, 1)", 10 | "name": "Annotations & Alerts", 11 | "type": "dashboard" 12 | } 13 | ] 14 | }, 15 | "editable": true, 16 | "gnetId": null, 17 | "graphTooltip": 0, 18 | "id": 1, 19 | "links": [], 20 | "panels": [ 21 | { 22 | "datasource": "Loki", 23 | "fieldConfig": { 24 | "defaults": { 25 | "custom": {} 26 | }, 27 | "overrides": [] 28 | }, 29 | "gridPos": { 30 | "h": 4, 31 | "w": 24, 32 | "x": 0, 33 | "y": 0 34 | }, 35 | "id": 18, 36 | "options": { 37 | "showLabels": false, 38 | "showTime": false, 39 | "sortOrder": "Descending", 40 | "wrapLogMessage": false 41 | }, 42 | "pluginVersion": "7.3.5", 43 | "targets": [ 44 | { 45 | "expr": "{filename!~\"/nomad/alloc/.+/alloc/logs/(loki.+|traefik.+|prometheus.+|connect-proxy.+|promtail.+)\", job=\"nomad\"}", 46 | "legendFormat": "{{hostname}}", 47 | "refId": "A" 48 | }, 49 | { 50 | "expr": "", 51 | "legendFormat": "", 52 | "refId": "B" 53 | } 54 | ], 55 | "timeFrom": null, 56 | "timeShift": null, 57 | "title": "Allocation Logs", 58 | "type": "logs" 59 | }, 60 | { 61 | "aliasColors": {}, 62 | "bars": false, 63 | "dashLength": 10, 64 | "dashes": false, 65 | "datasource": null, 66 | "fieldConfig": { 67 | "defaults": { 68 | "custom": { 69 | "align": null, 70 | "filterable": false 71 | }, 72 | "mappings": [], 73 | "thresholds": { 74 | "mode": "absolute", 75 | "steps": [ 76 | { 77 | "color": "green", 78 | "value": null 79 | }, 80 | { 81 | "color": "red", 82 | "value": 80 83 | } 84 | ] 85 | } 86 | }, 87 | "overrides": [] 88 | }, 89 | "fill": 1, 90 | "fillGradient": 0, 91 | "gridPos": { 92 | "h": 7, 93 | "w": 24, 94 | "x": 0, 95 | "y": 4 96 | }, 97 | "hiddenSeries": false, 98 | "id": 14, 99 | "legend": { 100 | "alignAsTable": true, 101 | "avg": true, 102 | "current": true, 103 | "max": true, 104 | "min": true, 105 | "rightSide": true, 106 | "show": true, 107 | "total": false, 108 | "values": true 109 | }, 110 | "lines": true, 111 | "linewidth": 1, 112 | "nullPointMode": "null", 113 | "options": { 114 | "alertThreshold": true 115 | }, 116 | "percentage": false, 117 | "pluginVersion": "7.3.5", 118 | "pointradius": 2, 119 | "points": false, 120 | "renderer": "flot", 121 | "seriesOverrides": [], 122 | "spaceLength": 10, 123 | "stack": false, 124 | "steppedLine": false, 125 | "targets": [ 126 | { 127 | "expr": "nomad_client_host_cpu_total", 128 | "interval": "", 129 | "legendFormat": "{{node_id}} ({{cpu}})", 130 | "refId": "A" 131 | } 132 | ], 133 | "thresholds": [], 134 | "timeFrom": null, 135 | "timeRegions": [], 136 | "timeShift": null, 137 | "title": "Host CPU Usage", 138 | "tooltip": { 139 | "shared": true, 140 | "sort": 0, 141 | "value_type": "individual" 142 | }, 143 | "type": "graph", 144 | "xaxis": { 145 | "buckets": null, 146 | "mode": "time", 147 | "name": null, 148 | "show": true, 149 | "values": [] 150 | }, 151 | "yaxes": [ 152 | { 153 | "format": "percent", 154 | "label": null, 155 | "logBase": 1, 156 | "max": null, 157 | "min": null, 158 | "show": true 159 | }, 160 | { 161 | "format": "short", 162 | "label": null, 163 | "logBase": 1, 164 | "max": null, 165 | "min": null, 166 | "show": true 167 | } 168 | ], 169 | "yaxis": { 170 | "align": false, 171 | "alignLevel": null 172 | } 173 | }, 174 | { 175 | "aliasColors": {}, 176 | "bars": false, 177 | "dashLength": 10, 178 | "dashes": false, 179 | "datasource": null, 180 | "fieldConfig": { 181 | "defaults": { 182 | "custom": {} 183 | }, 184 | "overrides": [] 185 | }, 186 | "fill": 1, 187 | "fillGradient": 0, 188 | "gridPos": { 189 | "h": 7, 190 | "w": 24, 191 | "x": 0, 192 | "y": 11 193 | }, 194 | "hiddenSeries": false, 195 | "id": 16, 196 | "legend": { 197 | "alignAsTable": true, 198 | "avg": true, 199 | "current": true, 200 | "max": true, 201 | "min": true, 202 | "rightSide": true, 203 | "show": true, 204 | "total": false, 205 | "values": true 206 | }, 207 | "lines": true, 208 | "linewidth": 1, 209 | "nullPointMode": "null", 210 | "options": { 211 | "alertThreshold": true 212 | }, 213 | "percentage": false, 214 | "pluginVersion": "7.3.5", 215 | "pointradius": 2, 216 | "points": false, 217 | "renderer": "flot", 218 | "seriesOverrides": [], 219 | "spaceLength": 10, 220 | "stack": false, 221 | "steppedLine": false, 222 | "targets": [ 223 | { 224 | "expr": "nomad_client_host_cpu_idle", 225 | "interval": "", 226 | "legendFormat": "{{node_id}} ({{cpu}})", 227 | "refId": "A" 228 | } 229 | ], 230 | "thresholds": [], 231 | "timeFrom": null, 232 | "timeRegions": [], 233 | "timeShift": null, 234 | "title": "Host CPU Idle", 235 | "tooltip": { 236 | "shared": true, 237 | "sort": 0, 238 | "value_type": "individual" 239 | }, 240 | "type": "graph", 241 | "xaxis": { 242 | "buckets": null, 243 | "mode": "time", 244 | "name": null, 245 | "show": true, 246 | "values": [] 247 | }, 248 | "yaxes": [ 249 | { 250 | "format": "percent", 251 | "label": null, 252 | "logBase": 1, 253 | "max": null, 254 | "min": null, 255 | "show": true 256 | }, 257 | { 258 | "format": "short", 259 | "label": null, 260 | "logBase": 1, 261 | "max": null, 262 | "min": null, 263 | "show": true 264 | } 265 | ], 266 | "yaxis": { 267 | "align": false, 268 | "alignLevel": null 269 | } 270 | }, 271 | { 272 | "aliasColors": {}, 273 | "bars": false, 274 | "dashLength": 10, 275 | "dashes": false, 276 | "datasource": null, 277 | "fieldConfig": { 278 | "defaults": { 279 | "custom": {}, 280 | "mappings": [], 281 | "thresholds": { 282 | "mode": "absolute", 283 | "steps": [ 284 | { 285 | "color": "green", 286 | "value": null 287 | }, 288 | { 289 | "color": "red", 290 | "value": 80 291 | } 292 | ] 293 | } 294 | }, 295 | "overrides": [] 296 | }, 297 | "fill": 1, 298 | "fillGradient": 0, 299 | "gridPos": { 300 | "h": 7, 301 | "w": 24, 302 | "x": 0, 303 | "y": 18 304 | }, 305 | "hiddenSeries": false, 306 | "id": 12, 307 | "legend": { 308 | "alignAsTable": true, 309 | "avg": true, 310 | "current": true, 311 | "max": true, 312 | "min": true, 313 | "rightSide": true, 314 | "show": true, 315 | "total": false, 316 | "values": true 317 | }, 318 | "lines": true, 319 | "linewidth": 1, 320 | "nullPointMode": "null", 321 | "options": { 322 | "alertThreshold": true 323 | }, 324 | "percentage": false, 325 | "pluginVersion": "7.3.5", 326 | "pointradius": 2, 327 | "points": false, 328 | "renderer": "flot", 329 | "seriesOverrides": [], 330 | "spaceLength": 10, 331 | "stack": false, 332 | "steppedLine": false, 333 | "targets": [ 334 | { 335 | "expr": "nomad_client_host_memory_used", 336 | "interval": "", 337 | "legendFormat": "{{node_id}}", 338 | "refId": "A" 339 | } 340 | ], 341 | "thresholds": [], 342 | "timeFrom": null, 343 | "timeRegions": [], 344 | "timeShift": null, 345 | "title": "Host Memory Used", 346 | "tooltip": { 347 | "shared": true, 348 | "sort": 0, 349 | "value_type": "individual" 350 | }, 351 | "type": "graph", 352 | "xaxis": { 353 | "buckets": null, 354 | "mode": "time", 355 | "name": null, 356 | "show": true, 357 | "values": [] 358 | }, 359 | "yaxes": [ 360 | { 361 | "format": "decbytes", 362 | "label": null, 363 | "logBase": 1, 364 | "max": null, 365 | "min": null, 366 | "show": true 367 | }, 368 | { 369 | "format": "short", 370 | "label": null, 371 | "logBase": 1, 372 | "max": null, 373 | "min": null, 374 | "show": true 375 | } 376 | ], 377 | "yaxis": { 378 | "align": false, 379 | "alignLevel": null 380 | } 381 | }, 382 | { 383 | "aliasColors": {}, 384 | "bars": false, 385 | "dashLength": 10, 386 | "dashes": false, 387 | "datasource": null, 388 | "fieldConfig": { 389 | "defaults": { 390 | "custom": {} 391 | }, 392 | "overrides": [] 393 | }, 394 | "fill": 1, 395 | "fillGradient": 0, 396 | "gridPos": { 397 | "h": 5, 398 | "w": 24, 399 | "x": 0, 400 | "y": 25 401 | }, 402 | "hiddenSeries": false, 403 | "id": 8, 404 | "legend": { 405 | "alignAsTable": true, 406 | "avg": true, 407 | "current": true, 408 | "max": false, 409 | "min": false, 410 | "rightSide": true, 411 | "show": true, 412 | "total": false, 413 | "values": true 414 | }, 415 | "lines": true, 416 | "linewidth": 1, 417 | "nullPointMode": "null", 418 | "options": { 419 | "alertThreshold": true 420 | }, 421 | "percentage": false, 422 | "pluginVersion": "7.3.5", 423 | "pointradius": 2, 424 | "points": false, 425 | "renderer": "flot", 426 | "seriesOverrides": [], 427 | "spaceLength": 10, 428 | "stack": false, 429 | "steppedLine": false, 430 | "targets": [ 431 | { 432 | "expr": "nomad_client_allocated_disk", 433 | "interval": "", 434 | "legendFormat": "{{ node_id }}", 435 | "refId": "A" 436 | } 437 | ], 438 | "thresholds": [], 439 | "timeFrom": null, 440 | "timeRegions": [], 441 | "timeShift": null, 442 | "title": "Allocated Disk", 443 | "tooltip": { 444 | "shared": true, 445 | "sort": 0, 446 | "value_type": "individual" 447 | }, 448 | "type": "graph", 449 | "xaxis": { 450 | "buckets": null, 451 | "mode": "time", 452 | "name": null, 453 | "show": true, 454 | "values": [] 455 | }, 456 | "yaxes": [ 457 | { 458 | "format": "decmbytes", 459 | "label": null, 460 | "logBase": 1, 461 | "max": null, 462 | "min": null, 463 | "show": true 464 | }, 465 | { 466 | "format": "short", 467 | "label": null, 468 | "logBase": 1, 469 | "max": null, 470 | "min": null, 471 | "show": true 472 | } 473 | ], 474 | "yaxis": { 475 | "align": false, 476 | "alignLevel": null 477 | } 478 | }, 479 | { 480 | "aliasColors": {}, 481 | "bars": false, 482 | "dashLength": 10, 483 | "dashes": false, 484 | "datasource": null, 485 | "fieldConfig": { 486 | "defaults": { 487 | "custom": {} 488 | }, 489 | "overrides": [] 490 | }, 491 | "fill": 1, 492 | "fillGradient": 0, 493 | "gridPos": { 494 | "h": 5, 495 | "w": 24, 496 | "x": 0, 497 | "y": 30 498 | }, 499 | "hiddenSeries": false, 500 | "id": 6, 501 | "legend": { 502 | "alignAsTable": true, 503 | "avg": true, 504 | "current": true, 505 | "max": false, 506 | "min": false, 507 | "rightSide": true, 508 | "show": true, 509 | "total": false, 510 | "values": true 511 | }, 512 | "lines": true, 513 | "linewidth": 1, 514 | "nullPointMode": "null", 515 | "options": { 516 | "alertThreshold": true 517 | }, 518 | "percentage": false, 519 | "pluginVersion": "7.3.5", 520 | "pointradius": 2, 521 | "points": false, 522 | "renderer": "flot", 523 | "seriesOverrides": [], 524 | "spaceLength": 10, 525 | "stack": false, 526 | "steppedLine": false, 527 | "targets": [ 528 | { 529 | "expr": "nomad_client_allocations_running", 530 | "interval": "", 531 | "legendFormat": "{{ node_id }}", 532 | "refId": "A" 533 | } 534 | ], 535 | "thresholds": [], 536 | "timeFrom": null, 537 | "timeRegions": [], 538 | "timeShift": null, 539 | "title": "Allocations Running", 540 | "tooltip": { 541 | "shared": true, 542 | "sort": 0, 543 | "value_type": "individual" 544 | }, 545 | "type": "graph", 546 | "xaxis": { 547 | "buckets": null, 548 | "mode": "time", 549 | "name": null, 550 | "show": true, 551 | "values": [] 552 | }, 553 | "yaxes": [ 554 | { 555 | "format": "short", 556 | "label": null, 557 | "logBase": 1, 558 | "max": null, 559 | "min": null, 560 | "show": true 561 | }, 562 | { 563 | "format": "short", 564 | "label": null, 565 | "logBase": 1, 566 | "max": null, 567 | "min": null, 568 | "show": true 569 | } 570 | ], 571 | "yaxis": { 572 | "align": false, 573 | "alignLevel": null 574 | } 575 | }, 576 | { 577 | "aliasColors": {}, 578 | "bars": false, 579 | "dashLength": 10, 580 | "dashes": false, 581 | "datasource": null, 582 | "fieldConfig": { 583 | "defaults": { 584 | "custom": {} 585 | }, 586 | "overrides": [] 587 | }, 588 | "fill": 1, 589 | "fillGradient": 0, 590 | "gridPos": { 591 | "h": 5, 592 | "w": 24, 593 | "x": 0, 594 | "y": 35 595 | }, 596 | "hiddenSeries": false, 597 | "id": 4, 598 | "legend": { 599 | "alignAsTable": true, 600 | "avg": true, 601 | "current": true, 602 | "max": false, 603 | "min": false, 604 | "rightSide": true, 605 | "show": true, 606 | "total": false, 607 | "values": true 608 | }, 609 | "lines": true, 610 | "linewidth": 1, 611 | "nullPointMode": "null", 612 | "options": { 613 | "alertThreshold": true 614 | }, 615 | "percentage": false, 616 | "pluginVersion": "7.3.5", 617 | "pointradius": 2, 618 | "points": false, 619 | "renderer": "flot", 620 | "seriesOverrides": [], 621 | "spaceLength": 10, 622 | "stack": false, 623 | "steppedLine": false, 624 | "targets": [ 625 | { 626 | "expr": "nomad_client_allocated_memory", 627 | "interval": "", 628 | "legendFormat": "{{ node_id }}", 629 | "refId": "A" 630 | } 631 | ], 632 | "thresholds": [], 633 | "timeFrom": null, 634 | "timeRegions": [], 635 | "timeShift": null, 636 | "title": "Client Allocated Memory", 637 | "tooltip": { 638 | "shared": true, 639 | "sort": 0, 640 | "value_type": "individual" 641 | }, 642 | "type": "graph", 643 | "xaxis": { 644 | "buckets": null, 645 | "mode": "time", 646 | "name": null, 647 | "show": true, 648 | "values": [] 649 | }, 650 | "yaxes": [ 651 | { 652 | "format": "decmbytes", 653 | "label": "", 654 | "logBase": 1, 655 | "max": null, 656 | "min": null, 657 | "show": true 658 | }, 659 | { 660 | "format": "short", 661 | "label": "", 662 | "logBase": 1, 663 | "max": null, 664 | "min": null, 665 | "show": true 666 | } 667 | ], 668 | "yaxis": { 669 | "align": false, 670 | "alignLevel": null 671 | } 672 | }, 673 | { 674 | "aliasColors": {}, 675 | "bars": false, 676 | "dashLength": 10, 677 | "dashes": false, 678 | "datasource": "Prometheus", 679 | "fieldConfig": { 680 | "defaults": { 681 | "custom": {}, 682 | "mappings": [], 683 | "thresholds": { 684 | "mode": "absolute", 685 | "steps": [ 686 | { 687 | "color": "green", 688 | "value": null 689 | }, 690 | { 691 | "color": "red", 692 | "value": 80 693 | } 694 | ] 695 | } 696 | }, 697 | "overrides": [] 698 | }, 699 | "fill": 1, 700 | "fillGradient": 0, 701 | "gridPos": { 702 | "h": 5, 703 | "w": 24, 704 | "x": 0, 705 | "y": 40 706 | }, 707 | "hiddenSeries": false, 708 | "id": 2, 709 | "legend": { 710 | "alignAsTable": true, 711 | "avg": true, 712 | "current": true, 713 | "max": false, 714 | "min": false, 715 | "rightSide": true, 716 | "show": true, 717 | "sideWidth": null, 718 | "total": false, 719 | "values": true 720 | }, 721 | "lines": true, 722 | "linewidth": 1, 723 | "nullPointMode": "null", 724 | "options": { 725 | "alertThreshold": true 726 | }, 727 | "percentage": false, 728 | "pluginVersion": "7.3.5", 729 | "pointradius": 2, 730 | "points": false, 731 | "renderer": "flot", 732 | "seriesOverrides": [], 733 | "spaceLength": 10, 734 | "stack": false, 735 | "steppedLine": false, 736 | "targets": [ 737 | { 738 | "expr": "nomad_client_allocated_cpu", 739 | "interval": "", 740 | "legendFormat": "{{ node_id }}", 741 | "refId": "A" 742 | } 743 | ], 744 | "thresholds": [], 745 | "timeFrom": null, 746 | "timeRegions": [], 747 | "timeShift": null, 748 | "title": "Client Allocated CPU", 749 | "tooltip": { 750 | "shared": true, 751 | "sort": 0, 752 | "value_type": "individual" 753 | }, 754 | "type": "graph", 755 | "xaxis": { 756 | "buckets": null, 757 | "mode": "time", 758 | "name": null, 759 | "show": true, 760 | "values": [] 761 | }, 762 | "yaxes": [ 763 | { 764 | "format": "MHz", 765 | "label": "MHz", 766 | "logBase": 1, 767 | "max": null, 768 | "min": "0", 769 | "show": true 770 | }, 771 | { 772 | "format": "short", 773 | "label": null, 774 | "logBase": 1, 775 | "max": null, 776 | "min": null, 777 | "show": true 778 | } 779 | ], 780 | "yaxis": { 781 | "align": false, 782 | "alignLevel": null 783 | } 784 | } 785 | ], 786 | "refresh": "5s", 787 | "schemaVersion": 26, 788 | "style": "dark", 789 | "tags": [], 790 | "templating": { 791 | "list": [] 792 | }, 793 | "time": { 794 | "from": "now-1h", 795 | "to": "now" 796 | }, 797 | "timepicker": {}, 798 | "timezone": "", 799 | "title": "Nomad Clients", 800 | "uid": "IM0JufHnk", 801 | "version": 2 802 | } -------------------------------------------------------------------------------- /dashboards/providers.tf: -------------------------------------------------------------------------------- 1 | terraform { 2 | required_providers { 3 | grafana = { 4 | source = "grafana/grafana" 5 | version = "1.13.4" 6 | } 7 | } 8 | } -------------------------------------------------------------------------------- /diagrams/readme.d2: -------------------------------------------------------------------------------- 1 | admin -> deploy -> cloud 2 | 3 | cloud.org.project.vpc.lb -> cloud.org.project.vpc.subnet.cluster 4 | 5 | admin: "Administrator" { 6 | style: { 7 | fill: "#ffffff" 8 | stroke: grey 9 | border-radius: 4 10 | } 11 | terraform: "" { 12 | shape: image 13 | icon: https://www.svgrepo.com/show/408174/woman-technologist-medium-dark-skin-tone.svg 14 | } 15 | } 16 | 17 | deploy: "Deploy" { 18 | style: { 19 | fill: "#ffffff" 20 | stroke: grey 21 | border-radius: 4 22 | } 23 | terraform: "" { 24 | shape: image 25 | icon: https://www.svgrepo.com/show/376353/terraform.svg 26 | } 27 | } 28 | 29 | cloud: "Google Cloud Platform" { 30 | direction: down 31 | icon: https://www.svgrepo.com/show/353805/google-cloud.svg 32 | icon.near: top-right 33 | style: { 34 | fill: "#ffffff" 35 | stroke: grey 36 | border-radius: 4 37 | } 38 | org: "Organization" { 39 | project: "Project" { 40 | vpc: "VPC" { 41 | lb: "Load Balancer" 42 | subnet: "Subnet" { 43 | cluster: "Nomad Cluster" { 44 | label.near: top-center 45 | icon: https://www.svgrepo.com/show/448241/nomad.svg 46 | icon.near: top-right 47 | 48 | servers: "Servers" { 49 | a: "Server A" { 50 | agent: "" { 51 | shape: image 52 | width: 100 53 | icon: https://www.svgrepo.com/show/448241/nomad.svg 54 | } 55 | icon: https://www.svgrepo.com/show/448216/consul.svg 56 | icon.near: bottom-right 57 | } 58 | b: "Server B" { 59 | agent: "" { 60 | shape: image 61 | width: 100 62 | icon: https://www.svgrepo.com/show/448241/nomad.svg 63 | } 64 | icon: https://www.svgrepo.com/show/448216/consul.svg 65 | icon.near: bottom-right 66 | } 67 | c: "Server C" { 68 | agent: "" { 69 | shape: image 70 | width: 100 71 | icon: https://www.svgrepo.com/show/448241/nomad.svg 72 | } 73 | icon: https://www.svgrepo.com/show/448216/consul.svg 74 | icon.near: bottom-right 75 | } 76 | direction: right 77 | } 78 | clients: "Clients" { 79 | a: "Client A" { 80 | agent: "" { 81 | shape: image 82 | width: 100 83 | icon: https://www.svgrepo.com/show/448241/nomad.svg 84 | } 85 | icon: https://www.svgrepo.com/show/448216/consul.svg 86 | icon.near: bottom-right 87 | } 88 | b: "Client B" { 89 | agent: "" { 90 | shape: image 91 | width: 100 92 | icon: https://www.svgrepo.com/show/448241/nomad.svg 93 | } 94 | icon: https://www.svgrepo.com/show/448216/consul.svg 95 | icon.near: bottom-right 96 | } 97 | c: "Client C" { 98 | agent: "" { 99 | shape: image 100 | width: 100 101 | icon: https://www.svgrepo.com/show/448241/nomad.svg 102 | } 103 | icon: https://www.svgrepo.com/show/448216/consul.svg 104 | icon.near: bottom-right 105 | } 106 | d: "Client D" { 107 | agent: "" { 108 | shape: image 109 | width: 100 110 | icon: https://www.svgrepo.com/show/448241/nomad.svg 111 | } 112 | icon: https://www.svgrepo.com/show/448216/consul.svg 113 | icon.near: bottom-right 114 | } 115 | e: "Client E" { 116 | agent: "" { 117 | shape: image 118 | width: 100 119 | icon: https://www.svgrepo.com/show/448241/nomad.svg 120 | } 121 | icon: https://www.svgrepo.com/show/448216/consul.svg 122 | icon.near: bottom-right 123 | } 124 | e: "Client F" { 125 | agent: "" { 126 | shape: image 127 | width: 100 128 | icon: https://www.svgrepo.com/show/448241/nomad.svg 129 | } 130 | icon: https://www.svgrepo.com/show/448216/consul.svg 131 | icon.near: bottom-right 132 | } 133 | } 134 | 135 | servers <-> clients 136 | } 137 | } 138 | } 139 | } 140 | } 141 | } -------------------------------------------------------------------------------- /dns.tf: -------------------------------------------------------------------------------- 1 | resource "google_dns_record_set" "public" { 2 | count = var.dns_enabled ? 1 : 0 3 | name = format("%s.%s.", var.dns_record_set_name_prefix, var.dns_managed_zone_dns_name) 4 | type = "A" 5 | ttl = 300 6 | 7 | managed_zone = google_dns_managed_zone.nomad.0.name 8 | 9 | rrdatas = [module.load_balancer.external_ip] 10 | } 11 | 12 | resource "google_dns_managed_zone" "nomad" { 13 | count = var.dns_enabled ? 1 : 0 14 | name = "nomad" 15 | dns_name = format("%s.", var.dns_managed_zone_dns_name) 16 | } 17 | 18 | resource "google_dns_record_set" "grafana_public" { 19 | count = (var.dns_enabled && var.grafana_dns_managed_zone_dns_name != "") ? 1 : 0 20 | name = format("%s.%s.", var.grafana_dns_record_set_name_prefix, var.grafana_dns_managed_zone_dns_name) 21 | type = "A" 22 | ttl = 300 23 | 24 | managed_zone = google_dns_managed_zone.grafana.0.name 25 | 26 | rrdatas = [module.grafana_load_balancer.external_ip] 27 | } 28 | 29 | resource "google_dns_managed_zone" "grafana" { 30 | count = (var.dns_enabled && var.grafana_dns_managed_zone_dns_name != "") ? 1 : 0 31 | name = "grafana" 32 | dns_name = format("%s.", var.grafana_dns_managed_zone_dns_name) 33 | } -------------------------------------------------------------------------------- /example/jobs/folding-at-home.hcl: -------------------------------------------------------------------------------- 1 | job "folding-at-home" { 2 | datacenters = ["dc1"] 3 | group "folding-at-home" { 4 | task "folding-at-home" { 5 | driver = "docker" 6 | config { 7 | image = "kentgruber/fah-client:latest" 8 | } 9 | } 10 | } 11 | } -------------------------------------------------------------------------------- /example/main.tf: -------------------------------------------------------------------------------- 1 | variable "project" { 2 | description = "The GCP project name to deploy the cluster to." 3 | } 4 | 5 | variable "credentials" { 6 | description = "The GCP credentials file path to use, preferably a Terraform Service Account." 7 | } 8 | 9 | module "nomad" { 10 | source = "picatz/nomad/google" 11 | version = "2.5.0" 12 | project = var.project 13 | credentials = var.credentials 14 | bastion_enabled = false 15 | server_instances = 1 16 | client_instances = 1 17 | } -------------------------------------------------------------------------------- /example/outputs.tf: -------------------------------------------------------------------------------- 1 | output "ca_cert" { 2 | sensitive = true 3 | description = "The TLS CA certificate used for CLI authentication." 4 | value = module.nomad.ca_cert 5 | } 6 | 7 | output "cli_cert" { 8 | sensitive = true 9 | description = "The TLS certificate used for CLI authentication." 10 | value = module.nomad.cli_cert 11 | } 12 | 13 | output "cli_key" { 14 | sensitive = true 15 | description = "The TLS private key used for CLI authentication." 16 | value = module.nomad.cli_key 17 | } 18 | 19 | output "bastion_ssh_public_key" { 20 | sensitive = true 21 | description = "The SSH bastion public key." 22 | value = module.nomad.bastion_ssh_public_key 23 | } 24 | 25 | output "bastion_ssh_private_key" { 26 | sensitive = true 27 | description = "The SSH bastion private key." 28 | value = module.nomad.bastion_ssh_private_key 29 | } 30 | 31 | output "bastion_public_ip" { 32 | description = "The SSH bastion public IP." 33 | value = module.nomad.bastion_public_ip 34 | } 35 | 36 | output "nomad_server_ip" { 37 | description = "The Nomad server private IP." 38 | value = module.nomad.nomad_server_ip 39 | } 40 | 41 | output "load_balancer_ip" { 42 | description = "The external ip address of the load balacner" 43 | value = module.nomad.load_balancer_ip 44 | } -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/picatz/terraform-google-nomad 2 | 3 | go 1.14 4 | 5 | require golang.org/x/crypto v0.1.0 6 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= 2 | golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= 3 | golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= 4 | golang.org/x/crypto v0.1.0 h1:MDRAIl0xIo9Io2xV565hzXHw3zVseKrJKodhohM5CjU= 5 | golang.org/x/crypto v0.1.0/go.mod h1:RecgLatLF4+eUMCP1PoPZQb+cVrJcOPbHkTkbkB9sbw= 6 | golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= 7 | golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= 8 | golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= 9 | golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= 10 | golang.org/x/net v0.1.0/go.mod h1:Cx3nUiGt4eDBEyega/BKRp+/AlGL8hYe7U9odMt2Cco= 11 | golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 12 | golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 13 | golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 14 | golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 15 | golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 16 | golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 17 | golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 18 | golang.org/x/sys v0.1.0 h1:kunALQeHf1/185U1i0GOB/fy1IPRDDpuoOOqRReG57U= 19 | golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 20 | golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= 21 | golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= 22 | golang.org/x/term v0.1.0 h1:g6Z6vPFA9dYBAF7DWcH6sCcOntplXsDKcliusYijMlw= 23 | golang.org/x/term v0.1.0/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= 24 | golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= 25 | golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= 26 | golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= 27 | golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= 28 | golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= 29 | golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= 30 | golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= 31 | golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 32 | -------------------------------------------------------------------------------- /gossip_keys.tf: -------------------------------------------------------------------------------- 1 | resource "random_id" "nomad-gossip-key" { 2 | byte_length = 32 3 | } 4 | 5 | resource "random_id" "consul-gossip-key" { 6 | byte_length = 32 7 | } 8 | -------------------------------------------------------------------------------- /jobs/count-dashboard.hcl: -------------------------------------------------------------------------------- 1 | job "countdash" { 2 | datacenters = ["dc1"] 3 | group "api" { 4 | network { 5 | mode = "bridge" 6 | } 7 | 8 | service { 9 | name = "count-api" 10 | port = "9001" 11 | 12 | connect { 13 | sidecar_service {} 14 | } 15 | } 16 | 17 | task "web" { 18 | driver = "docker" 19 | config { 20 | image = "hashicorpnomad/counter-api:v1" 21 | } 22 | } 23 | } 24 | 25 | group "dashboard" { 26 | network { 27 | mode ="bridge" 28 | port "http" { 29 | static = 9002 30 | to = 9002 31 | } 32 | } 33 | 34 | service { 35 | name = "count-dashboard" 36 | port = "9002" 37 | 38 | connect { 39 | sidecar_service { 40 | proxy { 41 | upstreams { 42 | destination_name = "count-api" 43 | local_bind_port = 8080 44 | } 45 | } 46 | } 47 | } 48 | } 49 | 50 | task "dashboard" { 51 | driver = "docker" 52 | env { 53 | COUNTING_SERVICE_URL = "http://${NOMAD_UPSTREAM_ADDR_count_api}" 54 | } 55 | config { 56 | image = "hashicorpnomad/counter-dashboard:v1" 57 | } 58 | } 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /jobs/db/cockroach.hcl: -------------------------------------------------------------------------------- 1 | variable "datacenters" { 2 | type = list(string) 3 | default = ["dc1"] 4 | } 5 | 6 | job "cockroach" { 7 | datacenters = var.datacenters 8 | 9 | type = "service" 10 | 11 | update { 12 | max_parallel = 1 13 | stagger = "30s" 14 | min_healthy_time = "30s" 15 | healthy_deadline = "3m" 16 | } 17 | 18 | constraint { 19 | distinct_hosts = true 20 | } 21 | 22 | group "cockroach-1" { 23 | network { 24 | mode = "bridge" 25 | port "metrics" {} 26 | } 27 | 28 | service { 29 | name = "cockroach-metrics" 30 | port = "metrics" 31 | connect { 32 | sidecar_service { 33 | proxy { 34 | expose { 35 | path { 36 | path = "/_status/vars" 37 | protocol = "http" 38 | listener_port = "metrics" 39 | local_path_port = 8080 40 | } 41 | } 42 | } 43 | } 44 | } 45 | } 46 | 47 | service { 48 | name = "cockroach" 49 | port = "26258" 50 | 51 | connect { 52 | sidecar_service {} 53 | } 54 | } 55 | 56 | service { 57 | name = "cockroach-1" 58 | port = "26258" 59 | 60 | connect { 61 | sidecar_service { 62 | proxy { 63 | upstreams { 64 | destination_name = "cockroach-2" 65 | local_bind_port = 26259 66 | } 67 | upstreams { 68 | destination_name = "cockroach-3" 69 | local_bind_port = 26260 70 | } 71 | } 72 | } 73 | } 74 | } 75 | 76 | ephemeral_disk { 77 | migrate = true 78 | sticky = true 79 | size = 5000 # 5GB 80 | } 81 | 82 | task "cockroach" { 83 | driver = "docker" 84 | config { 85 | image = "cockroachdb/cockroach:latest" 86 | args = [ 87 | "start", 88 | "--insecure", 89 | "--advertise-addr=localhost:26258", 90 | "--listen-addr=localhost:26258", 91 | "--http-addr=0.0.0.0:8080", 92 | "--join=localhost:26258,localhost:26259,localhost:26260", 93 | "--logtostderr=WARNING", 94 | ] 95 | } 96 | } 97 | } 98 | 99 | group "cockroach-2" { 100 | network { 101 | mode = "bridge" 102 | port "metrics" {} 103 | } 104 | 105 | service { 106 | name = "cockroach-metrics" 107 | port = "metrics" 108 | connect { 109 | sidecar_service { 110 | proxy { 111 | expose { 112 | path { 113 | path = "/_status/vars" 114 | protocol = "http" 115 | listener_port = "metrics" 116 | local_path_port = 8080 117 | } 118 | } 119 | } 120 | } 121 | } 122 | } 123 | 124 | service { 125 | name = "cockroach" 126 | port = "26259" 127 | 128 | connect { 129 | sidecar_service {} 130 | } 131 | } 132 | 133 | service { 134 | name = "cockroach-2" 135 | port = "26259" 136 | 137 | connect { 138 | sidecar_service { 139 | proxy { 140 | upstreams { 141 | destination_name = "cockroach-1" 142 | local_bind_port = 26258 143 | } 144 | upstreams { 145 | destination_name = "cockroach-3" 146 | local_bind_port = 26260 147 | } 148 | } 149 | } 150 | } 151 | } 152 | 153 | ephemeral_disk { 154 | migrate = true 155 | sticky = true 156 | size = 5000 # 5GB 157 | } 158 | 159 | task "cockroach" { 160 | driver = "docker" 161 | config { 162 | image = "cockroachdb/cockroach:latest" 163 | args = [ 164 | "start", 165 | "--insecure", 166 | "--advertise-addr=localhost:26259", 167 | "--http-addr=0.0.0.0:8080", 168 | "--listen-addr=localhost:26259", 169 | "--join=localhost:26258,localhost:26259,localhost:26260", 170 | "--logtostderr=WARNING", 171 | ] 172 | } 173 | } 174 | } 175 | 176 | group "cockroach-3" { 177 | network { 178 | mode = "bridge" 179 | port "metrics" {} 180 | } 181 | 182 | service { 183 | name = "cockroach-metrics" 184 | port = "metrics" 185 | connect { 186 | sidecar_service { 187 | proxy { 188 | expose { 189 | path { 190 | path = "/_status/vars" 191 | protocol = "http" 192 | listener_port = "metrics" 193 | local_path_port = 8080 194 | } 195 | } 196 | } 197 | } 198 | } 199 | } 200 | 201 | service { 202 | name = "cockroach" 203 | port = "26260" 204 | 205 | connect { 206 | sidecar_service {} 207 | } 208 | } 209 | 210 | service { 211 | name = "cockroach-3" 212 | port = "26260" 213 | 214 | connect { 215 | sidecar_service { 216 | proxy { 217 | upstreams { 218 | destination_name = "cockroach-1" 219 | local_bind_port = 26258 220 | } 221 | upstreams { 222 | destination_name = "cockroach-2" 223 | local_bind_port = 26259 224 | } 225 | } 226 | } 227 | } 228 | } 229 | 230 | ephemeral_disk { 231 | migrate = true 232 | sticky = true 233 | size = 5000 # 5GB 234 | } 235 | 236 | task "cockroach" { 237 | driver = "docker" 238 | config { 239 | image = "cockroachdb/cockroach:latest" 240 | args = [ 241 | "start", 242 | "--insecure", 243 | "--advertise-addr=localhost:26260", 244 | "--http-addr=0.0.0.0:8080", 245 | "--listen-addr=localhost:26260", 246 | "--join=localhost:26258,localhost:26259,localhost:26260", 247 | "--logtostderr=WARNING", 248 | ] 249 | } 250 | } 251 | } 252 | } -------------------------------------------------------------------------------- /jobs/db/timescale.hcl: -------------------------------------------------------------------------------- 1 | // docker run -d --name timescaledb -p 5432:5432 -e POSTGRES_PASSWORD=password timescale/timescaledb:latest-pg12 2 | variable "datacenters" { 3 | type = list(string) 4 | default = ["dc1"] 5 | } 6 | 7 | job "timescaledb" { 8 | datacenters = var.datacenters 9 | 10 | group "timescaledb" { 11 | network { 12 | mode = "bridge" 13 | } 14 | 15 | service { 16 | name = "timescaledb" 17 | port = "5432" 18 | 19 | connect { 20 | sidecar_service {} 21 | } 22 | } 23 | 24 | ephemeral_disk { 25 | size = 10240 # 10 GB 26 | migrate = true 27 | sticky = true 28 | } 29 | 30 | task "timescaledb" { 31 | driver = "docker" 32 | 33 | # Note, configuration is found at: 34 | # /var/lib/postgresql/data/postgresql.conf 35 | 36 | env { 37 | POSTGRES_PASSWORD = "password" 38 | } 39 | 40 | config { 41 | image = "timescale/timescaledb:latest-pg12" 42 | } 43 | } 44 | } 45 | 46 | group "promscale" { 47 | network { 48 | mode = "bridge" 49 | } 50 | 51 | service { 52 | name = "promscale" 53 | port = "9201" 54 | 55 | connect { 56 | sidecar_service { 57 | proxy { 58 | upstreams { 59 | destination_name = "timescaledb" 60 | local_bind_port = 5432 61 | } 62 | } 63 | } 64 | } 65 | } 66 | 67 | ephemeral_disk { 68 | size = 10240 # 10 GB 69 | migrate = true 70 | sticky = true 71 | } 72 | 73 | task "promscale" { 74 | driver = "docker" 75 | 76 | env { 77 | POSTGRES_PASSWORD = "password" 78 | 79 | // PROMSCALE_WEB_TELEMETRY_PATH = "/metrics" 80 | // PROMSCALE_DB_CONNECT_RETRIES = 10 81 | // PROMSCALE_LOG_LEVEL = "info" 82 | // PROMSCALE_DB_NAME = "timescale" 83 | // PROMSCALE_DB_PORT = 5432 84 | // PROMSCALE_DB_SSL_MODE = "allow" 85 | // PROMSCALE_DB_HOST="127.0.0.1" 86 | // PROMSCALE_DB_URI = "" 87 | } 88 | 89 | config { 90 | image = "timescale/promscale:latest" 91 | 92 | args = [ 93 | "-db-uri", "postgres://postgres:$POSTGRES_PASSWORD@127.0.0.1:5432/postgres?sslmode=allow", 94 | ] 95 | } 96 | } 97 | } 98 | } 99 | -------------------------------------------------------------------------------- /jobs/ingress/services.hcl: -------------------------------------------------------------------------------- 1 | consul_services = [ 2 | { 3 | name = "grafana" 4 | port = 3000 5 | } 6 | ] 7 | -------------------------------------------------------------------------------- /jobs/ingress/traefik.hcl: -------------------------------------------------------------------------------- 1 | variable "datacenters" { 2 | type = list(string) 3 | default = ["dc1"] 4 | } 5 | 6 | job "ingress" { 7 | datacenters = var.datacenters 8 | type = "system" 9 | 10 | group "traefik" { 11 | count = 1 12 | 13 | network { 14 | mode = "bridge" 15 | port "grafana" { 16 | static = 3000 17 | to = 3000 18 | } 19 | port "dashboard" { 20 | static = 8081 21 | to = 8081 22 | } 23 | port "metrics" { 24 | static = 8082 25 | to = 8082 26 | } 27 | } 28 | 29 | service { 30 | name = "traefik-grafana" 31 | port = "grafana" 32 | 33 | connect { 34 | sidecar_service { 35 | proxy { 36 | upstreams { 37 | destination_name = "grafana" 38 | local_bind_port = 3001 39 | } 40 | } 41 | } 42 | } 43 | } 44 | 45 | task "traefik" { 46 | template { 47 | change_mode = "restart" 48 | destination = "local/traefik.toml" 49 | data = <= 1) ? google_compute_instance.vm[0].network_interface[0].access_config[0].nat_ip : "" 4 | } 5 | 6 | output "internal_ip" { 7 | value = (var.instances >= 1) ? google_compute_instance.vm[0].network_interface[0].network_ip : "" 8 | } 9 | 10 | output "internal_ips" { 11 | value = (var.instances >= 1) ? [for i in range(var.instances) : google_compute_instance.vm[i].network_interface[0].network_ip] : [] 12 | } 13 | -------------------------------------------------------------------------------- /modules/vm/vars.tf: -------------------------------------------------------------------------------- 1 | variable "image" { 2 | type = string 3 | } 4 | 5 | variable "ssh_public_key" { 6 | type = string 7 | default = "" 8 | } 9 | 10 | variable "ssh_user" { 11 | type = string 12 | default = "ubuntu" 13 | } 14 | 15 | variable "external_ip" { 16 | type = bool 17 | default = false 18 | } 19 | 20 | variable "name" { 21 | type = string 22 | } 23 | 24 | variable "subnetwork" { 25 | type = string 26 | } 27 | 28 | variable "tags" { 29 | type = list(string) 30 | } 31 | 32 | variable "instances" { 33 | type = number 34 | default = 1 35 | } 36 | 37 | variable "metadata_startup_script" { 38 | type = string 39 | default = "" 40 | } 41 | 42 | variable "region" { 43 | type = string 44 | default = "us-east1" 45 | } 46 | 47 | variable "zone" { 48 | type = string 49 | default = "c" 50 | } 51 | 52 | variable "machine_type" { 53 | type = string 54 | default = "n1-standard-1" 55 | } 56 | 57 | variable "disk_size" { 58 | type = number 59 | default = 50 60 | } 61 | 62 | variable "enable_shielded_vm" { 63 | type = bool 64 | default = true 65 | } 66 | 67 | variable "enable_preemptible" { 68 | type = bool 69 | default = false 70 | } -------------------------------------------------------------------------------- /modules/vm/vm.tf: -------------------------------------------------------------------------------- 1 | resource "google_compute_instance" "vm" { 2 | count = var.instances 3 | name = format("%s-%d", var.name, count.index) 4 | machine_type = var.machine_type 5 | zone = format("%s-%s", var.region, var.zone) 6 | tags = var.tags 7 | 8 | allow_stopping_for_update = true 9 | 10 | boot_disk { 11 | initialize_params { 12 | image = var.image 13 | size = var.disk_size 14 | } 15 | } 16 | 17 | network_interface { 18 | subnetwork = var.subnetwork 19 | 20 | # https://github.com/hashicorp/terraform/issues/21717#issuecomment-502148701 21 | dynamic "access_config" { 22 | for_each = var.external_ip ? [{}] : [] 23 | // Ephemeral external IP address 24 | content {} 25 | } 26 | } 27 | 28 | lifecycle { 29 | create_before_destroy = "false" 30 | } 31 | 32 | scheduling { 33 | preemptible = var.enable_preemptible 34 | # scheduling must have automatic_restart be false when preemptible is true. 35 | automatic_restart = ! var.enable_preemptible 36 | } 37 | 38 | dynamic "shielded_instance_config" { 39 | # https://github.com/terraform-google-modules/terraform-google-vm/blob/a3d482fa2f33a61880d3cdfe2e7e86ee6b6597d0/modules/instance_template/main.tf#L51 40 | for_each = var.enable_shielded_vm ? [{}] : [] 41 | content { 42 | enable_secure_boot = false 43 | enable_vtpm = true 44 | enable_integrity_monitoring = true 45 | } 46 | } 47 | 48 | service_account { 49 | # https://developers.google.com/identity/protocols/googlescopes 50 | scopes = [ 51 | "https://www.googleapis.com/auth/compute.readonly", 52 | "https://www.googleapis.com/auth/logging.write", 53 | "https://www.googleapis.com/auth/devstorage.read_only", 54 | "https://www.googleapis.com/auth/monitoring.write", 55 | ] 56 | } 57 | 58 | metadata = { 59 | ssh-keys = format("%s:%s", var.ssh_user, var.ssh_public_key) 60 | startup-script = var.metadata_startup_script 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /mtls-terminating-proxy/README.md: -------------------------------------------------------------------------------- 1 | # `mtls-terminating-proxy` 2 | 3 | ```console 4 | $ go run main.go --lb-ip="$PUBLIC_IP" --ca-file="../nomad-ca.pem" --cert-file="../nomad-cli-cert.pem" --key-file="../nomad-cli-key.pem" 5 | 2020/04/26 23:10:07 Load Balancer IP: "$PUBLIC_IP" 6 | 2020/04/26 23:10:07 Loading the TLS data 7 | 2020/04/26 23:10:07 Starting local listener on localhost:4646 8 | ``` 9 | -------------------------------------------------------------------------------- /mtls-terminating-proxy/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "crypto/tls" 5 | "crypto/x509" 6 | "flag" 7 | "fmt" 8 | "io" 9 | "io/ioutil" 10 | "log" 11 | "net" 12 | "os" 13 | ) 14 | 15 | var ( 16 | lbIP string 17 | nomadCACert string 18 | nomadCLICert string 19 | nomadCLIKey string 20 | ) 21 | 22 | func errorAndExit(mesg interface{}) { 23 | fmt.Println(mesg) 24 | os.Exit(1) 25 | } 26 | 27 | func readFileContent(file string) string { 28 | f, err := os.Open(file) 29 | if err != nil { 30 | errorAndExit(fmt.Errorf("%s %w", file, err)) 31 | } 32 | defer f.Close() 33 | 34 | bytes, err := ioutil.ReadAll(f) 35 | if err != nil { 36 | errorAndExit(fmt.Errorf("%s %w", file, err)) 37 | } 38 | return string(bytes) 39 | } 40 | 41 | func init() { 42 | var ( 43 | nomadCACertFile string 44 | nomadCLICertFile string 45 | nomadCLIKeyFile string 46 | ) 47 | 48 | flag.StringVar(&lbIP, "lb-ip", "", "internal Nomad server IP") 49 | flag.StringVar(&nomadCACertFile, "ca-file", "", "mTLS certifcate authority file") 50 | flag.StringVar(&nomadCLICertFile, "cert-file", "", "mTLS client cert file") 51 | flag.StringVar(&nomadCLIKeyFile, "key-file", "", "mTLS client key file") 52 | 53 | flag.Parse() 54 | 55 | nomadCACert = readFileContent(nomadCACertFile) 56 | nomadCLICert = readFileContent(nomadCLICertFile) 57 | nomadCLIKey = readFileContent(nomadCLIKeyFile) 58 | 59 | log.Printf("Load Balancer IP: %q", lbIP) 60 | } 61 | 62 | func main() { 63 | log.Println("Loading the TLS data") 64 | nomadCert, err := tls.X509KeyPair([]byte(nomadCLICert), []byte(nomadCLIKey)) 65 | if err != nil { 66 | errorAndExit(err) 67 | } 68 | 69 | // TODO(kent): don't use insecure skip verify... 70 | // if I use ServerName I get "x509: certificate signed by unknown authority" as an error 71 | tlsClientConfig := &tls.Config{ 72 | Certificates: []tls.Certificate{nomadCert}, 73 | ClientCAs: x509.NewCertPool(), 74 | ClientAuth: tls.RequireAndVerifyClientCert, 75 | MinVersion: tls.VersionTLS12, 76 | // ServerName: "localhost", 77 | InsecureSkipVerify: true, 78 | } 79 | 80 | tlsClientConfig.ClientCAs.AppendCertsFromPEM([]byte(nomadCACert)) 81 | 82 | tlsClientConfig.BuildNameToCertificate() 83 | 84 | log.Println("Starting local listener on localhost:4646") 85 | ln, err := net.Listen("tcp", "localhost:4646") 86 | if err != nil { 87 | errorAndExit(err) 88 | } 89 | for { 90 | conn, err := ln.Accept() 91 | if err != nil { 92 | log.Println(err) 93 | continue 94 | } 95 | 96 | go func(conn net.Conn) { 97 | nomad, err := net.Dial("tcp", fmt.Sprintf("%s:4646", lbIP)) 98 | if err != nil { 99 | log.Println(err) 100 | return 101 | } 102 | 103 | nomadWrap := tls.Client(nomad, tlsClientConfig) 104 | 105 | err = nomadWrap.Handshake() 106 | if err != nil { 107 | log.Println(err) 108 | return 109 | } 110 | 111 | copyConn := func(writer, reader net.Conn) { 112 | defer writer.Close() 113 | defer reader.Close() 114 | io.Copy(writer, reader) 115 | } 116 | 117 | go copyConn(conn, nomadWrap) 118 | go copyConn(nomadWrap, conn) 119 | }(conn) 120 | } 121 | } 122 | -------------------------------------------------------------------------------- /network.tf: -------------------------------------------------------------------------------- 1 | module "network" { 2 | source = "./modules/network" 3 | cidr_range = var.cidr_range 4 | region = var.region 5 | } -------------------------------------------------------------------------------- /nomad_tls_ca.tf: -------------------------------------------------------------------------------- 1 | resource "tls_private_key" "nomad-ca" { 2 | algorithm = "RSA" 3 | rsa_bits = "2048" 4 | } 5 | 6 | resource "tls_self_signed_cert" "nomad-ca" { 7 | is_ca_certificate = true 8 | validity_period_hours = 87600 9 | 10 | private_key_pem = tls_private_key.nomad-ca.private_key_pem 11 | 12 | subject { 13 | common_name = "nomad-ca.local" 14 | organization = var.tls_organization 15 | } 16 | 17 | allowed_uses = [ 18 | "cert_signing", 19 | "digital_signature", 20 | "key_encipherment", 21 | ] 22 | } 23 | -------------------------------------------------------------------------------- /nomad_tls_cli.tf: -------------------------------------------------------------------------------- 1 | resource "tls_private_key" "nomad-cli" { 2 | algorithm = "RSA" 3 | rsa_bits = "2048" 4 | } 5 | 6 | resource "tls_cert_request" "nomad-cli" { 7 | private_key_pem = tls_private_key.nomad-cli.private_key_pem 8 | 9 | // ip_addresses = [ 10 | // module.load_balancer.external_ip, 11 | // "127.0.0.1", 12 | // ] 13 | 14 | subject { 15 | common_name = "cli.global.nomad" 16 | organization = var.tls_organization 17 | } 18 | } 19 | 20 | resource "tls_locally_signed_cert" "nomad-cli" { 21 | cert_request_pem = tls_cert_request.nomad-cli.cert_request_pem 22 | 23 | ca_private_key_pem = tls_private_key.nomad-ca.private_key_pem 24 | ca_cert_pem = tls_self_signed_cert.nomad-ca.cert_pem 25 | 26 | validity_period_hours = 87600 27 | 28 | allowed_uses = [ 29 | "client_auth", 30 | ] 31 | } 32 | -------------------------------------------------------------------------------- /nomad_tls_client.tf: -------------------------------------------------------------------------------- 1 | 2 | resource "tls_private_key" "nomad-client" { 3 | algorithm = "RSA" 4 | rsa_bits = "2048" 5 | } 6 | 7 | resource "tls_cert_request" "nomad-client" { 8 | private_key_pem = tls_private_key.nomad-client.private_key_pem 9 | 10 | ip_addresses = [ 11 | "127.0.0.1", 12 | ] 13 | 14 | dns_names = [ 15 | "localhost", 16 | "client.global.nomad", 17 | ] 18 | 19 | subject { 20 | common_name = "client.global.nomad" 21 | organization = var.tls_organization 22 | } 23 | } 24 | 25 | resource "tls_locally_signed_cert" "nomad-client" { 26 | cert_request_pem = tls_cert_request.nomad-client.cert_request_pem 27 | 28 | ca_private_key_pem = tls_private_key.nomad-ca.private_key_pem 29 | ca_cert_pem = tls_self_signed_cert.nomad-ca.cert_pem 30 | 31 | validity_period_hours = var.tls_validity_period_hours 32 | 33 | allowed_uses = [ 34 | "client_auth", 35 | ] 36 | } -------------------------------------------------------------------------------- /nomad_tls_server.tf: -------------------------------------------------------------------------------- 1 | resource "tls_private_key" "nomad-server" { 2 | algorithm = "RSA" 3 | rsa_bits = "2048" 4 | } 5 | 6 | resource "tls_cert_request" "nomad-server" { 7 | private_key_pem = tls_private_key.nomad-server.private_key_pem 8 | 9 | ip_addresses = [ 10 | module.load_balancer.external_ip, 11 | "127.0.0.1", 12 | ] 13 | 14 | dns_names = var.dns_enabled ? [ 15 | "localhost", 16 | "server.global.nomad", 17 | trimsuffix(google_dns_record_set.public.0.name, "."), 18 | ] : [ 19 | "localhost", 20 | "server.global.nomad", 21 | ] 22 | 23 | subject { 24 | common_name = "server.global.nomad" 25 | organization = var.tls_organization 26 | } 27 | } 28 | 29 | resource "tls_locally_signed_cert" "nomad-server" { 30 | cert_request_pem = tls_cert_request.nomad-server.cert_request_pem 31 | 32 | ca_private_key_pem = tls_private_key.nomad-ca.private_key_pem 33 | ca_cert_pem = tls_self_signed_cert.nomad-ca.cert_pem 34 | 35 | validity_period_hours = var.tls_validity_period_hours 36 | 37 | allowed_uses = [ 38 | "server_auth", 39 | "client_auth", 40 | ] 41 | } -------------------------------------------------------------------------------- /outputs.tf: -------------------------------------------------------------------------------- 1 | output "nomad_ca_cert" { 2 | sensitive = true 3 | description = "The TLS CA certificate used for CLI authentication." 4 | value = tls_self_signed_cert.nomad-ca.cert_pem 5 | } 6 | 7 | output "nomad_cli_cert" { 8 | sensitive = true 9 | description = "The TLS certificate used for CLI authentication." 10 | value = tls_locally_signed_cert.nomad-cli.cert_pem 11 | } 12 | 13 | output "nomad_cli_key" { 14 | sensitive = true 15 | description = "The TLS private key used for CLI authentication." 16 | value = tls_private_key.nomad-cli.private_key_pem 17 | } 18 | 19 | output "consul_ca_cert" { 20 | sensitive = true 21 | description = "The TLS CA certificate used for CLI authentication." 22 | value = tls_self_signed_cert.consul-ca.cert_pem 23 | } 24 | 25 | output "consul_cli_cert" { 26 | sensitive = true 27 | description = "The TLS certificate used for CLI authentication." 28 | value = tls_locally_signed_cert.consul-cli.cert_pem 29 | } 30 | 31 | output "consul_cli_key" { 32 | sensitive = true 33 | description = "The TLS private key used for CLI authentication." 34 | value = tls_private_key.consul-cli.private_key_pem 35 | } 36 | 37 | output "consul_master_token" { 38 | sensitive = true 39 | description = "The Consul master token." 40 | value = random_uuid.consul_master_token.result 41 | } 42 | 43 | output "bastion_ssh_public_key" { 44 | sensitive = true 45 | description = "The SSH bastion public key." 46 | value = tls_private_key.ssh_key.public_key_openssh 47 | } 48 | 49 | output "bastion_ssh_private_key" { 50 | sensitive = true 51 | description = "The SSH bastion private key." 52 | value = tls_private_key.ssh_key.private_key_pem 53 | } 54 | 55 | output "bastion_public_ip" { 56 | description = "The SSH bastion public IP." 57 | value = module.bastion.external_ip 58 | } 59 | 60 | output "server_internal_ip" { 61 | description = "The Nomad/Consul server private IP." 62 | value = module.server.internal_ip 63 | } 64 | 65 | output "load_balancer_ip" { 66 | description = "The external ip address of the load balacner" 67 | value = module.load_balancer.external_ip 68 | } 69 | 70 | output "grafana_load_balancer_ip" { 71 | description = "The external ip address of the grafana load balacner" 72 | value = module.grafana_load_balancer.external_ip 73 | } 74 | 75 | output "client_internal_ips" { 76 | description = "The Nomad/Consul client private IP addresses." 77 | value = module.client.internal_ips 78 | } 79 | 80 | output "server_internal_ips" { 81 | description = "The Nomad/Consul client private IP addresses." 82 | value = module.server.internal_ips 83 | } 84 | 85 | output "dns_name_servers" { 86 | description = "Delegate your managed_zone to these virtual name servers if DNS is enabled" 87 | value = var.dns_enabled ? google_dns_managed_zone.nomad.0.name_servers : [] 88 | } 89 | 90 | output "dns_url" { 91 | description = "The mTLS enabled public URL using the configured DNS name" 92 | value = (var.dns_enabled) ? format("https://%s", trimsuffix(google_dns_record_set.public.0.name, ".")) : "" 93 | } -------------------------------------------------------------------------------- /packer/Makefile: -------------------------------------------------------------------------------- 1 | build: 2 | packer build -force template.pkr.hcl 3 | -------------------------------------------------------------------------------- /packer/configs/consul/client.hcl: -------------------------------------------------------------------------------- 1 | datacenter = "dc1" 2 | bind_addr = "0.0.0.0" 3 | data_dir = "/etc/consul.d/data" 4 | primary_datacenter = "dc1" 5 | 6 | advertise_addr = "{PRIVATE-IPV4}" 7 | advertise_addr_wan = "{PRIVATE-IPV4}" 8 | 9 | addresses { 10 | https = "0.0.0.0" 11 | } 12 | 13 | ports { 14 | dns = 8600 15 | http = 8500 16 | https = 8501 17 | grpc = 8502 18 | grpc_tls = 8503 19 | } 20 | 21 | log_level = "DEBUG" 22 | 23 | disable_remote_exec = true 24 | disable_update_check = true 25 | leave_on_terminate = true 26 | 27 | retry_join = ["provider=gce project_name={PROJECT-NAME} tag_value=server"] 28 | 29 | server = false 30 | 31 | acl { 32 | enabled = {ACLs-ENABLED} 33 | default_policy = "{ACLs-DEFAULT-POLICY}" 34 | } 35 | 36 | tls { 37 | defaults { 38 | ca_file = "/etc/consul.d/consul-ca.pem" 39 | cert_file = "/etc/consul.d/client.pem" 40 | key_file = "/etc/consul.d/client-key.pem" 41 | 42 | verify_incoming = true 43 | verify_outgoing = true 44 | } 45 | 46 | internal_rpc { 47 | verify_server_hostname = true 48 | } 49 | } 50 | 51 | encrypt = "{GOSSIP-KEY}" 52 | encrypt_verify_incoming = true 53 | encrypt_verify_outgoing = true 54 | 55 | telemetry { 56 | prometheus_retention_time = "24h" 57 | disable_hostname = true 58 | } 59 | -------------------------------------------------------------------------------- /packer/configs/consul/consul.service: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description="HashiCorp Consul - A service mesh solution" 3 | Documentation=https://www.consul.io/docs/ 4 | Requires=network-online.target 5 | After=network-online.target 6 | 7 | [Service] 8 | User=consul 9 | Group=consul 10 | ExecStart=/bin/consul agent -ui -config-file /consul/config/agent.hcl 11 | ExecReload=/bin/consul reload 12 | ExecStop=/bin/consul leave 13 | KillMode=process 14 | Restart=on-failure 15 | LimitNOFILE=65536 16 | 17 | [Install] 18 | WantedBy=default.target -------------------------------------------------------------------------------- /packer/configs/consul/server.hcl: -------------------------------------------------------------------------------- 1 | datacenter = "dc1" 2 | bind_addr = "0.0.0.0" 3 | data_dir = "/etc/consul.d/data" 4 | primary_datacenter = "dc1" 5 | 6 | advertise_addr = "{PRIVATE-IPV4}" 7 | advertise_addr_wan = "{PRIVATE-IPV4}" 8 | 9 | addresses { 10 | https = "0.0.0.0" 11 | } 12 | 13 | ports { 14 | dns = 8600 15 | http = 8500 16 | https = 8501 17 | } 18 | 19 | log_level = "DEBUG" 20 | 21 | server = true 22 | ui = true 23 | disable_remote_exec = true 24 | disable_update_check = true 25 | bootstrap_expect = {NUMBER-OF-SERVERS} 26 | leave_on_terminate = true 27 | 28 | retry_join = ["provider=gce project_name={PROJECT-NAME} tag_value=server"] 29 | 30 | autopilot { 31 | cleanup_dead_servers = true 32 | last_contact_threshold = "200ms" 33 | max_trailing_logs = 250 34 | server_stabilization_time = "10s" 35 | } 36 | 37 | connect { 38 | enabled = true 39 | } 40 | 41 | acl { 42 | enabled = {ACLs-ENABLED} 43 | default_policy = "{ACLs-DEFAULT-POLICY}" 44 | enable_token_persistence = true 45 | tokens { 46 | master = "{CONSUL-TOKEN}" 47 | } 48 | } 49 | 50 | encrypt = "{GOSSIP-KEY}" 51 | encrypt_verify_incoming = true 52 | encrypt_verify_outgoing = true 53 | 54 | tls { 55 | defaults { 56 | ca_file = "/etc/consul.d/consul-ca.pem" 57 | cert_file = "/etc/consul.d/server.pem" 58 | key_file = "/etc/consul.d/server-key.pem" 59 | 60 | verify_incoming = true 61 | verify_outgoing = true 62 | } 63 | 64 | internal_rpc { 65 | verify_server_hostname = true 66 | } 67 | } 68 | 69 | auto_encrypt { 70 | allow_tls = true 71 | } 72 | 73 | telemetry { 74 | prometheus_retention_time = "24h" 75 | disable_hostname = true 76 | } 77 | -------------------------------------------------------------------------------- /packer/configs/nomad/client.hcl: -------------------------------------------------------------------------------- 1 | datacenter = "dc1" 2 | log_level = "DEBUG" 3 | data_dir = "/etc/nomad.d/data" 4 | 5 | client { 6 | enabled = true 7 | 8 | server_join { 9 | retry_join = ["provider=gce project_name={PROJECT-NAME} tag_value=server"] 10 | retry_max = 12 11 | retry_interval = "10s" 12 | } 13 | 14 | options { 15 | "driver.docker.enable" = "1" 16 | "driver.whitelist" = "docker" 17 | "user.blacklist" = "root,ubuntu" 18 | // "docker.auth.config" = "/etc/nomad.d/docker_auth_config.json" 19 | // "docker.auth.helper" = "gcr" 20 | } 21 | 22 | meta { 23 | "runtime" = "docker" 24 | } 25 | 26 | host_volume "nomad" { 27 | path = "/etc/nomad.d/data" 28 | } 29 | } 30 | 31 | acl { 32 | enabled = {ACLs-ENABLED} 33 | } 34 | 35 | tls { 36 | http = true 37 | rpc = true 38 | 39 | ca_file = "/etc/nomad.d/nomad-ca.pem" 40 | cert_file = "/etc/nomad.d/client.pem" 41 | key_file = "/etc/nomad.d/client-key.pem" 42 | 43 | verify_server_hostname = true 44 | verify_https_client = true 45 | } 46 | 47 | consul { 48 | ssl = true 49 | verify_ssl = true 50 | address = "127.0.0.1:8501" 51 | ca_file = "/etc/consul.d/consul-ca.pem" 52 | cert_file = "/etc/consul.d/client.pem" 53 | key_file = "/etc/consul.d/client-key.pem" 54 | token = "{CONSUL-TOKEN}" 55 | } 56 | 57 | telemetry { 58 | collection_interval = "5s" 59 | disable_hostname = true 60 | prometheus_metrics = true 61 | publish_allocation_metrics = true 62 | publish_node_metrics = true 63 | } 64 | 65 | plugin "docker" { 66 | config { 67 | endpoint = "unix:///var/run/docker.sock" 68 | 69 | allow_runtimes = ["runc","runsc"] 70 | 71 | allow_privileged = false 72 | 73 | // auth { 74 | // config = "/etc/nomad.d/docker_auth_config.json" 75 | // helper = "gcr" 76 | // } 77 | 78 | extra_labels = ["job_name", "job_id", "task_group_name", "task_name", "namespace", "node_name", "node_id"] 79 | 80 | gc { 81 | image = true 82 | image_delay = "3m" 83 | container = true 84 | 85 | dangling_containers { 86 | enabled = true 87 | dry_run = false 88 | period = "5m" 89 | creation_grace = "5m" 90 | } 91 | } 92 | } 93 | } -------------------------------------------------------------------------------- /packer/configs/nomad/docker_auth_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "auths": {}, 3 | "credHelpers": { 4 | "asia.gcr.io": "gcr", 5 | "eu.gcr.io": "gcr", 6 | "gcr.io": "gcr", 7 | "marketplace.gcr.io": "gcr", 8 | "us.gcr.io": "gcr" 9 | } 10 | } -------------------------------------------------------------------------------- /packer/configs/nomad/nomad.service: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description="HashiCorp Nomad - A flexible workload orchestrator solution" 3 | Documentation=https://www.nomadproject.io/docs/ 4 | Requires=network-online.target 5 | After=network-online.target 6 | 7 | [Service] 8 | User=root 9 | Group=root 10 | ExecStart=/bin/nomad agent -config /nomad/config/agent.hcl 11 | KillMode=process 12 | Restart=on-failure 13 | LimitNOFILE=65536 14 | 15 | [Install] 16 | WantedBy=default.target -------------------------------------------------------------------------------- /packer/configs/nomad/server.hcl: -------------------------------------------------------------------------------- 1 | datacenter = "dc1" 2 | bind_addr = "0.0.0.0" 3 | data_dir = "/etc/nomad.d/data" 4 | 5 | leave_on_terminate = true 6 | 7 | advertise { 8 | http = "{PRIVATE-IPV4}" 9 | rpc = "{PRIVATE-IPV4}" 10 | serf = "{PRIVATE-IPV4}" 11 | } 12 | 13 | log_level = "DEBUG" 14 | 15 | server { 16 | enabled = true 17 | 18 | server_join { 19 | retry_join = ["provider=gce project_name={PROJECT-NAME} tag_value=server"] 20 | retry_max = 12 21 | retry_interval = "10s" 22 | } 23 | 24 | bootstrap_expect = {NUMBER-OF-SERVERS} 25 | 26 | encrypt = "{GOSSIP-KEY}" 27 | } 28 | 29 | acl { 30 | enabled = {ACLs-ENABLED} 31 | } 32 | 33 | tls { 34 | http = true 35 | rpc = true 36 | 37 | ca_file = "/etc/nomad.d/nomad-ca.pem" 38 | cert_file = "/etc/nomad.d/server.pem" 39 | key_file = "/etc/nomad.d/server-key.pem" 40 | 41 | verify_server_hostname = true 42 | verify_https_client = true 43 | } 44 | 45 | consul { 46 | ssl = true 47 | verify_ssl = true 48 | address = "127.0.0.1:8501" 49 | ca_file = "/etc/consul.d/consul-ca.pem" 50 | cert_file = "/etc/consul.d/server.pem" 51 | key_file = "/etc/consul.d/server-key.pem" 52 | token = "{CONSUL-TOKEN}" 53 | } 54 | 55 | telemetry { 56 | collection_interval = "5s" 57 | disable_hostname = true 58 | prometheus_metrics = true 59 | publish_allocation_metrics = true 60 | publish_node_metrics = true 61 | } -------------------------------------------------------------------------------- /packer/scripts/install_cni_plugins.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -ex 4 | 5 | mkdir -p /tmp/download-cni 6 | cd /tmp/download-cni 7 | curl -L https://github.com/containernetworking/plugins/releases/download/v0.8.6/cni-plugins-linux-amd64-v0.8.6.tgz -o cni-plugins.tgz 8 | sudo mkdir -p /opt/cni/bin 9 | sudo tar -C /opt/cni/bin -xzf cni-plugins.tgz 10 | rm -rf /tmp/download-cni 11 | 12 | 13 | sudo sysctl -w net.bridge.bridge-nf-call-arptables=1 14 | sudo sysctl -w net.bridge.bridge-nf-call-ip6tables=1 15 | sudo sysctl -w net.bridge.bridge-nf-call-iptables=1 16 | 17 | # echo 1 > /proc/sys/net/bridge/bridge-nf-call-arptables 18 | # echo 1 > /proc/sys/net/bridge/bridge-nf-call-ip6tables 19 | # echo 1 > /proc/sys/net/bridge/bridge-nf-call-iptables 20 | cat > /tmp/nomad-cni.conf << EOF 21 | net.bridge.bridge-nf-call-arptables=1 22 | net.bridge.bridge-nf-call-ip6tables=1 23 | net.bridge.bridge-nf-call-iptables=1 24 | EOF 25 | sudo chown --recursive root:root /tmp/nomad-cni.conf 26 | sudo mv /tmp/nomad-cni.conf /etc/sysctl.d/nomad-cni.conf -------------------------------------------------------------------------------- /packer/scripts/install_consul.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -ex 4 | 5 | # Download Latest Version of Consul 6 | # https://developer.hashicorp.com/consul/docs/install 7 | sudo apt-get install -y consul 8 | 9 | # Move /tmp/consul-agent.hcl to /etc/consul.d/consul.hcl 10 | sudo mv /tmp/consul-agent.hcl /etc/consul.d/consul.hcl -------------------------------------------------------------------------------- /packer/scripts/install_docker-credential-gcr.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -ex 4 | 5 | # sudo mv /tmp/docker_auth_config.json /nomad/config 6 | # sudo chown root:root /nomad/config/docker_auth_config.json 7 | # 8 | # VERSION=2.0.0 9 | # OS=linux # or "darwin" for OSX, "windows" for Windows. 10 | # ARCH=amd64 # or "386" for 32-bit OSs, "arm64" for ARM 64. 11 | # 12 | # curl -fsSL "https://github.com/GoogleCloudPlatform/docker-credential-gcr/releases/download/v${VERSION}/docker-credential-gcr_${OS}_${ARCH}-${VERSION}.tar.gz" \ 13 | # | tar xz --to-stdout ./docker-credential-gcr \ 14 | # > /tmp/docker-credential-gcr 15 | # 16 | # sudo mv /tmp/docker-credential-gcr /usr/local/bin/ 17 | # sudo chmod +x /usr/local/bin/docker-credential-gcr -------------------------------------------------------------------------------- /packer/scripts/install_docker.sh: -------------------------------------------------------------------------------- 1 | 2 | #!/bin/bash 3 | 4 | set -ex 5 | 6 | curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add - 7 | sudo add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" 8 | sudo DEBIAN_FRONTEND=noninteractive apt-get update -y 9 | sudo DEBIAN_FRONTEND=noninteractive apt-get install -y docker-ce 10 | sudo systemctl enable docker -------------------------------------------------------------------------------- /packer/scripts/install_falco.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -ex 4 | 5 | # https://falco.org/docs/getting-started/falco-linux-quickstart/#install-falco 6 | curl -fsSL https://falco.org/repo/falcosecurity-packages.asc | sudo gpg --dearmor -o /usr/share/keyrings/falco-archive-keyring.gpg 7 | sudo bash -c 'cat << EOF > /etc/apt/sources.list.d/falcosecurity.list 8 | deb [signed-by=/usr/share/keyrings/falco-archive-keyring.gpg] https://download.falco.org/packages/deb stable main 9 | EOF' 10 | 11 | sudo apt-get update -y 12 | 13 | sudo apt-get -y install linux-headers-$(uname -r) 14 | sudo apt-get install -y falco -------------------------------------------------------------------------------- /packer/scripts/install_gvisor.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -ex 4 | 5 | # https://gvisor.dev/docs/user_guide/install/ 6 | ARCH=$(uname -m) 7 | URL=https://storage.googleapis.com/gvisor/releases/release/latest/${ARCH} 8 | wget ${URL}/runsc ${URL}/runsc.sha512 \ 9 | ${URL}/containerd-shim-runsc-v1 ${URL}/containerd-shim-runsc-v1.sha512 10 | sha512sum -c runsc.sha512 \ 11 | -c containerd-shim-runsc-v1.sha512 12 | rm -f *.sha512 13 | chmod a+rx runsc containerd-shim-runsc-v1 14 | sudo mv runsc containerd-shim-runsc-v1 /usr/local/bin 15 | 16 | # install as a docker runtime 17 | sudo /usr/local/bin/runsc install 18 | 19 | # sudo systemctl reload docker 20 | # docker run --rm --runtime=runsc hello-world -------------------------------------------------------------------------------- /packer/scripts/install_hashicorp_apt.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | wget -O- https://apt.releases.hashicorp.com/gpg | sudo gpg --dearmor -o /usr/share/keyrings/hashicorp-archive-keyring.gpg 4 | 5 | echo "deb [signed-by=/usr/share/keyrings/hashicorp-archive-keyring.gpg] https://apt.releases.hashicorp.com $(lsb_release -cs) main" | sudo tee /etc/apt/sources.list.d/hashicorp.list 6 | 7 | sudo apt-get update -y -------------------------------------------------------------------------------- /packer/scripts/install_nomad.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -ex 4 | 5 | # Download Latest Version of Nomad 6 | # https://developer.hashicorp.com/nomad/docs/install 7 | sudo apt-get install -y nomad 8 | 9 | # Move /tmp/nomad.hcl to /etc/nomad.d/nomad.hcl 10 | sudo mv /tmp/nomad-agent.hcl /etc/nomad.d/nomad.hcl -------------------------------------------------------------------------------- /packer/scripts/install_required_packages.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -ex 4 | 5 | # https://github.com/hashicorp/packer/issues/2639 6 | timeout 180 /usr/bin/cloud-init status --wait 7 | 8 | sudo apt-get update -y 9 | sudo apt-get install -y \ 10 | apt-transport-https \ 11 | ca-certificates \ 12 | unzip \ 13 | curl \ 14 | wget \ 15 | gpg \ 16 | coreutils \ 17 | gnupg-agent \ 18 | software-properties-common \ 19 | jq -------------------------------------------------------------------------------- /packer/scripts/install_stack_driver_agents.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -ex 4 | 5 | # Install monitoring agent 6 | curl -sSO https://dl.google.com/cloudagents/add-google-cloud-ops-agent-repo.sh 7 | sudo bash add-google-cloud-ops-agent-repo.sh --also-install 8 | -------------------------------------------------------------------------------- /packer/template.pkr.hcl: -------------------------------------------------------------------------------- 1 | packer { 2 | required_plugins { 3 | googlecompute = { 4 | source = "github.com/hashicorp/googlecompute" 5 | version = "~> 1" 6 | } 7 | } 8 | } 9 | 10 | variable "account_file" { 11 | type = string 12 | default = "${env("GOOGLE_APPLICATION_CREDENTIALS")}" 13 | } 14 | 15 | variable "disk_size_gb" { 16 | type = string 17 | default = "10" 18 | } 19 | 20 | variable "network" { 21 | type = string 22 | default = "default" 23 | } 24 | 25 | variable "project" { 26 | type = string 27 | default = "${env("GOOGLE_PROJECT")}" 28 | } 29 | 30 | variable "source_image_family" { 31 | type = string 32 | default = "ubuntu-2004-lts" 33 | } 34 | 35 | variable "subnetwork" { 36 | type = string 37 | default = "" 38 | } 39 | 40 | variable "use_iap" { 41 | type = string 42 | default = "false" 43 | } 44 | 45 | variable "use_preemptible" { 46 | type = string 47 | default = "false" 48 | } 49 | 50 | variable "zone" { 51 | type = string 52 | default = "us-east1-b" 53 | } 54 | 55 | source "googlecompute" "bastion" { 56 | account_file = "${var.account_file}" 57 | disk_size = "${var.disk_size_gb}" 58 | image_description = "nomad bastion image" 59 | image_name = "bastion" 60 | machine_type = "n1-standard-1" 61 | network = "${var.network}" 62 | preemptible = "${var.use_preemptible}" 63 | project_id = "${var.project}" 64 | source_image_family = "${var.source_image_family}" 65 | ssh_username = "ubuntu" 66 | state_timeout = "15m" 67 | subnetwork = "${var.subnetwork}" 68 | use_iap = "${var.use_iap}" 69 | zone = "${var.zone}" 70 | } 71 | 72 | source "googlecompute" "client" { 73 | account_file = "${var.account_file}" 74 | disk_size = "${var.disk_size_gb}" 75 | image_description = "HashiCorp Nomad and Consul client image" 76 | image_name = "client" 77 | machine_type = "n1-standard-1" 78 | network = "${var.network}" 79 | preemptible = "${var.use_preemptible}" 80 | project_id = "${var.project}" 81 | source_image_family = "${var.source_image_family}" 82 | ssh_username = "ubuntu" 83 | state_timeout = "15m" 84 | subnetwork = "${var.subnetwork}" 85 | use_iap = "${var.use_iap}" 86 | zone = "${var.zone}" 87 | } 88 | 89 | source "googlecompute" "server" { 90 | account_file = "${var.account_file}" 91 | disk_size = "${var.disk_size_gb}" 92 | image_description = "HashiCorp Nomad and Consul server image" 93 | image_name = "server" 94 | machine_type = "n1-standard-1" 95 | network = "${var.network}" 96 | preemptible = "${var.use_preemptible}" 97 | project_id = "${var.project}" 98 | source_image_family = "${var.source_image_family}" 99 | ssh_username = "ubuntu" 100 | state_timeout = "15m" 101 | subnetwork = "${var.subnetwork}" 102 | use_iap = "${var.use_iap}" 103 | zone = "${var.zone}" 104 | } 105 | 106 | build { 107 | sources = [ 108 | "source.googlecompute.bastion", 109 | "source.googlecompute.client", 110 | "source.googlecompute.server", 111 | ] 112 | 113 | provisioner "file" { 114 | destination = "/tmp/nomad-agent.hcl" 115 | only = ["googlecompute.server"] 116 | source = "configs/nomad/server.hcl" 117 | } 118 | 119 | provisioner "file" { 120 | destination = "/tmp/consul-agent.hcl" 121 | only = ["googlecompute.server"] 122 | source = "configs/consul/server.hcl" 123 | } 124 | 125 | provisioner "file" { 126 | destination = "/tmp/nomad-agent.hcl" 127 | only = ["googlecompute.client"] 128 | source = "configs/nomad/client.hcl" 129 | } 130 | 131 | provisioner "file" { 132 | destination = "/tmp/consul-agent.hcl" 133 | only = ["googlecompute.client"] 134 | source = "configs/consul/client.hcl" 135 | } 136 | 137 | provisioner "file" { 138 | destination = "/tmp/docker_auth_config.json" 139 | only = ["googlecompute.client"] 140 | source = "configs/nomad/docker_auth_config.json" 141 | } 142 | 143 | provisioner "file" { 144 | destination = "/tmp/nomad.service" 145 | only = ["googlecompute.server", "googlecompute.client"] 146 | source = "configs/nomad/nomad.service" 147 | } 148 | 149 | provisioner "file" { 150 | destination = "/tmp/consul.service" 151 | only = ["googlecompute.server", "googlecompute.client"] 152 | source = "configs/consul/consul.service" 153 | } 154 | 155 | provisioner "shell" { 156 | scripts = ["scripts/install_required_packages.sh"] 157 | } 158 | 159 | provisioner "shell" { 160 | only = ["googlecompute.client"] 161 | scripts = ["scripts/install_docker.sh", "scripts/install_gvisor.sh"] 162 | } 163 | 164 | provisioner "shell" { 165 | only = ["googlecompute.client"] 166 | scripts = ["scripts/install_cni_plugins.sh"] 167 | } 168 | 169 | provisioner "shell" { 170 | only = ["googlecompute.server", "googlecompute.client"] 171 | scripts = ["scripts/install_hashicorp_apt.sh", "scripts/install_nomad.sh", "scripts/install_consul.sh"] 172 | } 173 | 174 | provisioner "shell" { 175 | scripts = ["scripts/install_stack_driver_agents.sh", "scripts/install_falco.sh"] 176 | } 177 | 178 | provisioner "shell" { 179 | only = ["googlecompute.client"] 180 | scripts = ["scripts/install_docker-credential-gcr.sh"] 181 | } 182 | 183 | provisioner "shell" { 184 | inline = ["curl https://releases.hashicorp.com/nomad/1.6.1/nomad_1.6.1_linux_amd64.zip -o nomad.zip", "unzip nomad.zip", "sudo mv nomad /bin", "rm nomad.zip"] 185 | only = ["googlecompute.bastion"] 186 | } 187 | } 188 | -------------------------------------------------------------------------------- /providers.tf: -------------------------------------------------------------------------------- 1 | terraform { 2 | required_providers { 3 | tls = { 4 | version = ">= 4.0.4" 5 | source = "hashicorp/tls" 6 | } 7 | 8 | local = { 9 | version = ">= 2.3.0" 10 | source = "hashicorp/local" 11 | } 12 | 13 | google = { 14 | version = ">= 4.52.0" 15 | source = "hashicorp/google" 16 | } 17 | 18 | random = { 19 | version = ">= 3.4.3" 20 | source = "hashicorp/random" 21 | } 22 | } 23 | } -------------------------------------------------------------------------------- /setup_gcp.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Check if the gcloud command-line tool is installed 4 | if ! command -v gcloud; then 5 | echo "Install the Google Cloud SDK before using this script:" 6 | echo "https://cloud.google.com/sdk/" 7 | exit 1 8 | fi 9 | 10 | # Check if a project name was given at the command-line as the first argument 11 | if [ $# -eq 0 ];then 12 | echo "No project name given" 13 | exit 1 14 | fi 15 | 16 | # If no google organization is set, determine it. 17 | if [ -z "$GOOGLE_ORGANIZATION" ]; then 18 | # Attempt to auto-determine the organization, if there is only 1 19 | if [ `gcloud organizations list | grep -v "DISPLAY_NAME" | wc -l` = 1 ]; then 20 | GOOGLE_ORGANIZATION=`gcloud organizations list | grep -v "DISPLAY_NAME" | awk '{print $2}'` 21 | echo "Automatically determined organization $GOOGLE_ORGANIZATION" 22 | else 23 | if gcloud organizations list 2>&1 | grep -v "Listed 0 items"; then 24 | # if there are no organizations, then don't fail, since it's not possible to correct 25 | echo "No organization found. Skipping to next step." 26 | else 27 | gcloud organizations list 28 | echo -e "\nFrom the list above, choose the correct orgnaization ID and set it as the GOOGLE_ORGANIZATION environment variable to continue!\n" 29 | exit 1 30 | fi 31 | fi 32 | fi 33 | 34 | # If no google billing account is set, determine it. 35 | if [ -z "$GOOGLE_BILLING_ACCOUNT" ]; then 36 | # Attempt to auto-determine the organization, if there is only 1 37 | if [ `gcloud alpha billing accounts list | grep -v "ACCOUNT_ID" | wc -l` = 1 ]; then 38 | GOOGLE_BILLING_ACCOUNT=`gcloud alpha billing accounts list | grep -v "ACCOUNT_ID" | awk '{print $1}'` 39 | echo "Automatically determined billing account $GOOGLE_BILLING_ACCOUNT" 40 | else 41 | gcloud alpha billing accounts list 42 | echo -e "\nFrom the list above, choose the correct billing account ID and set it as the GOOGLE_BILLING_ACCOUNT environment variable to continue!\n" 43 | exit 1 44 | fi 45 | fi 46 | 47 | # Skip project creation if it already exists and we're just bootstrapping it. 48 | if gcloud projects list | grep -v "PROJECT_ID" | grep -q "$1"; then 49 | # we good 50 | echo -e "Project '$1' already exists, skipping creation!" 51 | else 52 | # create the project 53 | gcloud projects create "$1" --organization="$GOOGLE_ORGANIZATION" 54 | # now we good 55 | fi 56 | 57 | # Set gcloud config to use the given project 58 | gcloud config set project "$1" 59 | 60 | # Skip billing account linking if it already exists and we're just bootstrapping it. 61 | if gcloud alpha billing projects list --billing-account "$GOOGLE_BILLING_ACCOUNT" | grep -v "PROJECT_ID" | grep -q "$1"; then 62 | # we good 63 | echo -e "Project '$1' billing already exists, skipping linking!" 64 | else 65 | echo "Setting up '$1' with billing account $GOOGLE_BILLING_ACCOUNT" 66 | # create the project 67 | gcloud alpha billing projects link "$1" --billing-account "$GOOGLE_BILLING_ACCOUNT" 68 | # now we good 69 | fi 70 | 71 | echo "Enabling compute engine API for project" 72 | # Enable the compute engine API 73 | gcloud services enable compute.googleapis.com 74 | 75 | echo "Enabling container registry API for project" 76 | # Enable the compute engine API 77 | gcloud services enable containerregistry.googleapis.com 78 | 79 | echo "Creating the Terraform service account" 80 | # Create the service account with account.json file if it doesn't exist 81 | gcloud iam service-accounts create terraform \ 82 | --display-name "Terraform Service Account" \ 83 | --description "Service account to use with Terraform" 84 | 85 | echo "Adding the required IAM policy binding for the Terraform service account" 86 | gcloud projects add-iam-policy-binding "$1" \ 87 | --member serviceAccount:"terraform@$1.iam.gserviceaccount.com" \ 88 | --role roles/editor 89 | 90 | echo "Creating the required IAM service policy key 'account.json'" 91 | gcloud iam service-accounts keys create account.json \ 92 | --iam-account "terraform@$1.iam.gserviceaccount.com" 93 | -------------------------------------------------------------------------------- /ssh-mtls-terminating-proxy.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "crypto/tls" 5 | "crypto/x509" 6 | "encoding/json" 7 | "encoding/pem" 8 | "errors" 9 | "flag" 10 | "fmt" 11 | "io" 12 | "io/ioutil" 13 | "log" 14 | "net" 15 | "os" 16 | "os/exec" 17 | "sync" 18 | 19 | "golang.org/x/crypto/ssh" 20 | "golang.org/x/crypto/ssh/agent" 21 | ) 22 | 23 | type terraformOutputNode struct { 24 | Sensitive bool `json:"sensitive"` 25 | Type string `json:"type"` 26 | Value string `json:"value"` 27 | } 28 | 29 | type terraformOutput struct { 30 | BastionPublicIP terraformOutputNode `json:"bastion_public_ip"` 31 | BastionSSHPrivateKey terraformOutputNode `json:"bastion_ssh_private_key"` 32 | BastionSSHPublicKey terraformOutputNode `json:"bastion_ssh_public_key"` 33 | ServerInternalIP terraformOutputNode `json:"server_internal_ip"` 34 | NomadCACert terraformOutputNode `json:"nomad_ca_cert"` 35 | NomadCLICert terraformOutputNode `json:"nomad_cli_cert"` 36 | NomadCLIKey terraformOutputNode `json:"nomad_cli_key"` 37 | ConsulCACert terraformOutputNode `json:"consul_ca_cert"` 38 | ConsulCLICert terraformOutputNode `json:"consul_cli_cert"` 39 | ConsulCLIKey terraformOutputNode `json:"consul_cli_key"` 40 | } 41 | 42 | func getTerraformOutput() (*terraformOutput, error) { 43 | cmd := exec.Command("terraform", "output", "-json") 44 | output, err := cmd.CombinedOutput() 45 | if err != nil { 46 | fmt.Println(string(output)) 47 | return nil, err 48 | } 49 | 50 | var tfOutput terraformOutput 51 | 52 | err = json.Unmarshal(output, &tfOutput) 53 | if err != nil { 54 | return nil, err 55 | } 56 | 57 | return &tfOutput, nil 58 | } 59 | 60 | var ( 61 | serverInternalIP string 62 | bastionExternalIP string 63 | bastionSSHPrivKey string 64 | nomadCACert string 65 | nomadCLICert string 66 | nomadCLIKey string 67 | consulCACert string 68 | consulCLICert string 69 | consulCLIKey string 70 | ) 71 | 72 | func errorAndExit(mesg interface{}) { 73 | fmt.Println(mesg) 74 | os.Exit(1) 75 | } 76 | 77 | func readFileContent(file string) string { 78 | f, err := os.Open(file) 79 | if err != nil { 80 | errorAndExit(err) 81 | } 82 | defer f.Close() 83 | 84 | bytes, err := ioutil.ReadAll(f) 85 | if err != nil { 86 | errorAndExit(err) 87 | } 88 | return string(bytes) 89 | } 90 | 91 | func init() { 92 | if len(os.Args) <= 1 { 93 | log.Println("getting terraform output") 94 | tfOutput, err := getTerraformOutput() 95 | if err != nil { 96 | errorAndExit(err) 97 | } 98 | 99 | bastionExternalIP = tfOutput.BastionPublicIP.Value 100 | serverInternalIP = tfOutput.ServerInternalIP.Value 101 | bastionSSHPrivKey = tfOutput.BastionSSHPrivateKey.Value 102 | nomadCACert = tfOutput.NomadCACert.Value 103 | nomadCLICert = tfOutput.NomadCLICert.Value 104 | nomadCLIKey = tfOutput.NomadCLIKey.Value 105 | consulCACert = tfOutput.ConsulCACert.Value 106 | consulCLICert = tfOutput.ConsulCLICert.Value 107 | consulCLIKey = tfOutput.ConsulCLIKey.Value 108 | } else { 109 | var ( 110 | nomadBastionSSHFile string 111 | nomadCACertFile string 112 | nomadCLICertFile string 113 | nomadCLIKeyFile string 114 | ) 115 | 116 | flag.StringVar(&serverInternalIP, "server-ip", "", "internal Nomad server IP") 117 | flag.StringVar(&bastionExternalIP, "bastion-ip", "", "external Nomad bastion IP") 118 | flag.StringVar(&nomadBastionSSHFile, "bastion-ssh-file", "", "ssh key file") 119 | flag.StringVar(&nomadCACertFile, "ca-file", "", "mtls certifcate authority file") 120 | flag.StringVar(&nomadCLICertFile, "cert-file", "", "mtls client cert file") 121 | flag.StringVar(&nomadCLIKeyFile, "key-file", "", "mtls client key file") 122 | 123 | flag.Parse() 124 | 125 | bastionSSHPrivKey = readFileContent(nomadBastionSSHFile) 126 | nomadCACert = readFileContent(nomadCACertFile) 127 | nomadCLICert = readFileContent(nomadCLICertFile) 128 | nomadCLIKey = readFileContent(nomadCLIKeyFile) 129 | } 130 | 131 | log.Printf("Bastion IP: %q", bastionExternalIP) 132 | log.Printf("Server IP: %q", serverInternalIP) 133 | } 134 | 135 | func sshAgent(privPEM string) (ssh.AuthMethod, error) { 136 | block, _ := pem.Decode([]byte(privPEM)) 137 | if block == nil { 138 | return nil, fmt.Errorf("failed to parse PEM block containing the key") 139 | } 140 | 141 | pk, err := x509.ParsePKCS1PrivateKey(block.Bytes) 142 | if err != nil { 143 | return nil, fmt.Errorf("failed to parse PKCS1 private key: %w", err) 144 | } 145 | 146 | ak := agent.AddedKey{ 147 | PrivateKey: pk, 148 | } 149 | 150 | sshAgent, err := net.Dial("unix", os.Getenv("SSH_AUTH_SOCK")) 151 | if err != nil { 152 | return nil, fmt.Errorf("failed to connect to SSH_AUTH_SOCK: %w", err) 153 | } 154 | 155 | c := agent.NewClient(sshAgent) 156 | 157 | err = c.Add(ak) 158 | if err != nil { 159 | return nil, fmt.Errorf("failed to add key to agent: %w", err) 160 | } 161 | 162 | return ssh.PublicKeysCallback(c.Signers), nil 163 | } 164 | 165 | func main() { 166 | log.Println("Setting up SSH agent") 167 | sshAgent, err := sshAgent(bastionSSHPrivKey) 168 | if err != nil { 169 | errorAndExit(err) 170 | } 171 | 172 | sshConfig := &ssh.ClientConfig{ 173 | User: "ubuntu", 174 | Auth: []ssh.AuthMethod{ 175 | sshAgent, 176 | }, 177 | // TODO(kent): don't always use insecure ignore host key... 178 | HostKeyCallback: ssh.InsecureIgnoreHostKey(), 179 | } 180 | 181 | log.Println("connecting to the bastion") 182 | conn, err := ssh.Dial("tcp", fmt.Sprintf("%s:22", bastionExternalIP), sshConfig) 183 | if err != nil { 184 | errorAndExit(err) 185 | } 186 | defer conn.Close() 187 | 188 | log.Println("connecting to the server through the bastion") 189 | tconn, err := conn.Dial("tcp", fmt.Sprintf("%s:22", serverInternalIP)) 190 | if err != nil { 191 | errorAndExit(err) 192 | } 193 | defer tconn.Close() 194 | 195 | log.Println("wrapping the server connection with SSH through the bastion") 196 | stconn, chans, reqs, err := ssh.NewClientConn(tconn, fmt.Sprintf("%s:22", serverInternalIP), sshConfig) 197 | if err != nil { 198 | errorAndExit(err) 199 | } 200 | 201 | wg := sync.WaitGroup{} 202 | 203 | wg.Add(2) 204 | 205 | go func() { 206 | defer wg.Done() 207 | log.Println("tunneling a new connection for somad to the server with ssh through the bastion") 208 | tclient := ssh.NewClient(stconn, chans, reqs) 209 | defer tclient.Close() 210 | 211 | log.Println("loading Nomad TLS data") 212 | nomadCert, err := tls.X509KeyPair([]byte(nomadCLICert), []byte(nomadCLIKey)) 213 | if err != nil { 214 | errorAndExit(err) 215 | } 216 | 217 | pool := x509.NewCertPool() 218 | ok := pool.AppendCertsFromPEM([]byte(nomadCACert)) 219 | if !ok { 220 | errorAndExit(errors.New("failed to load Nomad ca cert from PEM")) 221 | } 222 | 223 | tlsVerifyOpts := x509.VerifyOptions{ 224 | Roots: pool, 225 | DNSName: "server.global.nomad", 226 | } 227 | 228 | tlsClientConfig := &tls.Config{ 229 | Certificates: []tls.Certificate{nomadCert}, 230 | MinVersion: tls.VersionTLS12, 231 | InsecureSkipVerify: true, // required for custom mTLS certificate verification 232 | VerifyPeerCertificate: func(rawCerts [][]byte, verifiedChains [][]*x509.Certificate) error { 233 | if len(rawCerts) != 1 { 234 | return fmt.Errorf("custom verification expected 1 cert duirng peer verification from server, found %d", len(rawCerts)) 235 | } 236 | peerCert, err := x509.ParseCertificate(rawCerts[0]) 237 | if err != nil { 238 | return fmt.Errorf("failed to parse peer certificate: %w", err) 239 | } 240 | _, err = peerCert.Verify(tlsVerifyOpts) 241 | if err != nil { 242 | return fmt.Errorf("failed to verify peer certificate: %w", err) 243 | } 244 | return nil 245 | }, 246 | } 247 | 248 | log.Println("starting Nomad local listener on localhost:4646") 249 | ln, err := net.Listen("tcp", "localhost:4646") 250 | if err != nil { 251 | errorAndExit(err) 252 | } 253 | for { 254 | conn, err := ln.Accept() 255 | if err != nil { 256 | log.Println(fmt.Errorf("failed to accepted connection on local listener: %v: %w", ln.Addr(), err)) 257 | continue 258 | } 259 | 260 | go func(conn net.Conn) { 261 | nomad, err := tclient.Dial("tcp", "0.0.0.0:4646") 262 | if err != nil { 263 | log.Println(fmt.Errorf("failed to connect to Nomad server over SSH: %w", err)) 264 | return 265 | } 266 | 267 | nomadWrap := tls.Client(nomad, tlsClientConfig) 268 | 269 | err = nomadWrap.Handshake() 270 | if err != nil { 271 | log.Println(fmt.Errorf("TLS handshake error to Nomad server over ssh: %w", err)) 272 | return 273 | } 274 | 275 | copyConn := func(writer, reader net.Conn) { 276 | defer writer.Close() 277 | defer reader.Close() 278 | io.Copy(writer, reader) 279 | } 280 | 281 | go copyConn(conn, nomadWrap) 282 | go copyConn(nomadWrap, conn) 283 | }(conn) 284 | } 285 | }() 286 | 287 | go func() { 288 | defer wg.Done() 289 | log.Println("tunneling a new connection for Consul to the server with SSH through the bastion") 290 | tclient := ssh.NewClient(stconn, chans, reqs) 291 | defer tclient.Close() 292 | 293 | log.Println("loading Consul TLS data") 294 | consulCert, err := tls.X509KeyPair([]byte(consulCLICert), []byte(consulCLIKey)) 295 | if err != nil { 296 | errorAndExit(err) 297 | } 298 | 299 | pool := x509.NewCertPool() 300 | ok := pool.AppendCertsFromPEM([]byte(consulCACert)) 301 | if !ok { 302 | errorAndExit(errors.New("failed to load Consul CA cert from PEM")) 303 | } 304 | 305 | tlsVerifyOpts := x509.VerifyOptions{ 306 | Roots: pool, 307 | DNSName: "server.dc1.consul", 308 | } 309 | 310 | tlsClientConfig := &tls.Config{ 311 | Certificates: []tls.Certificate{consulCert}, 312 | MinVersion: tls.VersionTLS12, 313 | InsecureSkipVerify: true, // required for custom mTLS certificate verification 314 | VerifyPeerCertificate: func(rawCerts [][]byte, verifiedChains [][]*x509.Certificate) error { 315 | if len(rawCerts) != 1 { 316 | return fmt.Errorf("custom verification expected 1 cert duirng peer verification from server, found %d", len(rawCerts)) 317 | } 318 | peerCert, err := x509.ParseCertificate(rawCerts[0]) 319 | if err != nil { 320 | return fmt.Errorf("failed to parse peer certificate: %w", err) 321 | } 322 | _, err = peerCert.Verify(tlsVerifyOpts) 323 | if err != nil { 324 | return fmt.Errorf("failed to verify peer certificate: %w", err) 325 | } 326 | return nil 327 | }, 328 | } 329 | 330 | log.Println("starting Consul local listener on localhost:8500") 331 | ln, err := net.Listen("tcp", "localhost:8500") 332 | if err != nil { 333 | errorAndExit(err) 334 | } 335 | for { 336 | conn, err := ln.Accept() 337 | if err != nil { 338 | log.Println(err) 339 | continue 340 | } 341 | 342 | go func(conn net.Conn) { 343 | consul, err := tclient.Dial("tcp", "0.0.0.0:8501") 344 | if err != nil { 345 | log.Println(err) 346 | return 347 | } 348 | 349 | consulWrap := tls.Client(consul, tlsClientConfig) 350 | 351 | err = consulWrap.Handshake() 352 | if err != nil { 353 | log.Println(fmt.Errorf("TLS handshake error to Consul server over ssh: %w", err)) 354 | return 355 | } 356 | 357 | copyConn := func(writer, reader net.Conn) { 358 | defer writer.Close() 359 | defer reader.Close() 360 | io.Copy(writer, reader) 361 | } 362 | 363 | go copyConn(conn, consulWrap) 364 | go copyConn(consulWrap, conn) 365 | }(conn) 366 | } 367 | }() 368 | 369 | wg.Wait() 370 | } 371 | -------------------------------------------------------------------------------- /ssh.tf: -------------------------------------------------------------------------------- 1 | resource "tls_private_key" "ssh_key" { 2 | algorithm = "RSA" 3 | rsa_bits = "2048" 4 | } 5 | 6 | resource "local_file" "ssh_public_key" { 7 | count = var.save_ssh_keypair_locally ? 1 : 0 8 | 9 | content = tls_private_key.ssh_key.public_key_openssh 10 | filename = "bastion.pub" 11 | file_permission = "0600" 12 | } 13 | 14 | resource "local_file" "ssh_private_key" { 15 | count = var.save_ssh_keypair_locally ? 1 : 0 16 | 17 | content = tls_private_key.ssh_key.private_key_pem 18 | filename = "bastion" 19 | file_permission = "0600" 20 | } 21 | -------------------------------------------------------------------------------- /templates/client.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # CONSUL CONFIGURATION 4 | 5 | # Add the Consul CA PEM 6 | cat > /tmp/consul-ca.pem << EOF 7 | ${consul_ca_cert} 8 | EOF 9 | sudo mv /tmp/consul-ca.pem /etc/consul.d/consul-ca.pem 10 | 11 | # Add the Consul Client PEM 12 | cat > /tmp/client.pem << EOF 13 | ${consul_client_cert} 14 | EOF 15 | sudo mv /tmp/client.pem /etc/consul.d/client.pem 16 | 17 | # Add the Consul Client Private Key PEM 18 | cat > /tmp/client-key.pem << EOF 19 | ${consul_client_private_key} 20 | EOF 21 | sudo mv /tmp/client-key.pem /etc/consul.d/client-key.pem 22 | 23 | # Update the {ACLs-ENABLED} ad-hoc template var 24 | sed -i -e "s/{ACLs-ENABLED}/${consul_acls_enabled}/g" /etc/consul.d/consul.hcl 25 | 26 | # Update the {ACLs-DEFAULT-POLICY} ad-hoc template var 27 | sed -i -e "s/{ACLs-DEFAULT-POLICY}/${consul_acls_default_policy}/g" /etc/consul.d/consul.hcl 28 | 29 | # Update the {ACLs-ENABLED} ad-hoc template var 30 | sed -i -e "s/{CONSUL-TOKEN}/${consul_master_token}/g" /etc/consul.d/consul.hcl 31 | 32 | # Set ACL master token if ACLs are enabled 33 | if [ "${consul_acls_enabled}" = "true" ]; then 34 | sed -i -e "s/{CONSUL-TOKEN}/${consul_master_token}/g" /etc/nomad.d/nomad.hcl 35 | sed -i -e "s/{CONSUL-TOKEN}/${consul_master_token}/g" /etc/consul.d/consul.hcl 36 | else 37 | sed -i -e "s/{CONSUL-TOKEN}//g" /etc/nomad.d/nomad.hcl 38 | sed -i -e "s/{CONSUL-TOKEN}//g" /etc/consul.d/consul.hcl 39 | fi 40 | 41 | # Update the {PROJECT-NAME} ad-hoc template var 42 | sed -i -e "s/{PROJECT-NAME}/${project}/g" /etc/consul.d/consul.hcl 43 | 44 | # Update the {PRIVATE-IPV4} ad-hoc template var 45 | IP=$(curl -H "Metadata-Flavor: Google" http://metadata/computeMetadata/v1/instance/network-interfaces/0/ip) 46 | sed -i -e "s/{PRIVATE-IPV4}/$${IP}/g" /etc/consul.d/consul.hcl 47 | 48 | # Update the {GOSSIP-SECRET-KEY} ad-hoc template var 49 | sed -i -e "s/{GOSSIP-KEY}/${consul_gossip_secret_key}/g" /etc/consul.d/consul.hcl 50 | 51 | # Start and enable Consul 52 | systemctl start consul 53 | systemctl enable consul 54 | 55 | # NOMAD CONFIGURATION 56 | 57 | # Add the Nomad CA PEM 58 | cat > /tmp/nomad-ca.pem << EOF 59 | ${nomad_ca_cert} 60 | EOF 61 | sudo mv /tmp/nomad-ca.pem /etc/nomad.d/nomad-ca.pem 62 | 63 | # Add the Nomad Client PEM 64 | cat > /tmp/client.pem << EOF 65 | ${nomad_client_cert} 66 | EOF 67 | sudo mv /tmp/client.pem /etc/nomad.d/client.pem 68 | 69 | # Add the Nomad Client Private Key PEM 70 | cat > /tmp/client-key.pem << EOF 71 | ${nomad_client_private_key} 72 | EOF 73 | sudo mv /tmp/client-key.pem /etc/nomad.d/client-key.pem 74 | 75 | # Update the {ACLs-ENABLED} ad-hoc template var 76 | sed -i -e "s/{ACLs-ENABLED}/${nomad_acls_enabled}/g" /etc/nomad.d/nomad.hcl 77 | 78 | # Update the {PROJECT-NAME} ad-hoc template var 79 | sed -i -e "s/{PROJECT-NAME}/${project}/g" /etc/nomad.d/nomad.hcl 80 | 81 | # Configure the Docker Daemon 82 | cat > /tmp/daemon.json << EOF 83 | ${docker_config} 84 | EOF 85 | sudo mv /tmp/daemon.json /etc/docker/daemon.json 86 | 87 | # Restart docker to apply changes 88 | systemctl restart docker 89 | 90 | # Start and enable Nomad 91 | systemctl start nomad 92 | systemctl enable nomad 93 | 94 | # Block access to the metadata endpoint in four easy steps 95 | # https://github.com/picatz/terraform-google-nomad/issues/19 96 | # 97 | # 1. Create NOAMD-ADMIN chain 98 | sudo iptables --new NOMAD-ADMIN 99 | # 2. Add default rule (this is appended by Nomad by default to the end of the chain as well... maye not needed?) 100 | sudo iptables --append NOMAD-ADMIN --destination 172.26.64.0/20 --jump ACCEPT 101 | # 3. Allow access to metadata endpoint for DNS resolution (UDP only) 102 | sudo iptables --append NOMAD-ADMIN --destination 169.254.169.254/32 --protocol udp --dport 53 --jump ACCEPT 103 | # 4. Block access to metadata endpoint 104 | sudo iptables --append NOMAD-ADMIN --destination 169.254.169.254/32 --jump DROP 105 | -------------------------------------------------------------------------------- /templates/server.sh: -------------------------------------------------------------------------------- 1 | 2 | #!/bin/bash 3 | 4 | # CONSUL CONFIGURATION 5 | 6 | # Add the Consul CA PEM 7 | cat > /tmp/consul-ca.pem << EOF 8 | ${consul_ca_cert} 9 | EOF 10 | sudo mv /tmp/consul-ca.pem /etc/consul.d/consul-ca.pem 11 | 12 | # Add the Consul Server PEM 13 | cat > /tmp/server.pem << EOF 14 | ${consul_server_cert} 15 | EOF 16 | sudo mv /tmp/server.pem /etc/consul.d/server.pem 17 | 18 | # Add the Consul Server Private Key PEM 19 | cat > /tmp/server-key.pem << EOF 20 | ${consul_server_private_key} 21 | EOF 22 | sudo mv /tmp/server-key.pem /etc/consul.d/server-key.pem 23 | 24 | # Update the {NUMBER-OF-SERVERS} ad-hoc template var 25 | sed -i -e "s/{NUMBER-OF-SERVERS}/${number_of_servers}/g" /etc/consul.d/consul.hcl 26 | 27 | # Update the {GOSSIP-SECRET-KEY} ad-hoc template var 28 | sed -i -e "s/{GOSSIP-KEY}/${consul_gossip_secret_key}/g" /etc/consul.d/consul.hcl 29 | 30 | # Update the {PROJECT-NAME} ad-hoc template var 31 | sed -i -e "s/{PROJECT-NAME}/${project}/g" /etc/consul.d/consul.hcl 32 | 33 | # Update the {ACLs-ENABLED} ad-hoc template var 34 | sed -i -e "s/{ACLs-ENABLED}/${consul_acls_enabled}/g" /etc/consul.d/consul.hcl 35 | 36 | # Update the {ACLs-DEFAULT-POLICY} ad-hoc template var 37 | sed -i -e "s/{ACLs-DEFAULT-POLICY}/${consul_acls_default_policy}/g" /etc/consul.d/consul.hcl 38 | 39 | # Set ACL master token if ACLs are enabled 40 | if [ "${consul_acls_enabled}" = "true" ]; then 41 | sed -i -e "s/{CONSUL-TOKEN}/${consul_master_token}/g" /etc/nomad.d/nomad.hcl 42 | sed -i -e "s/{CONSUL-TOKEN}/${consul_master_token}/g" /etc/consul.d/consul.hcl 43 | else 44 | sed -i -e "s/{CONSUL-TOKEN}//g" /etc/nomad.d/nomad.hcl 45 | sed -i -e "s/{CONSUL-TOKEN}//g" /etc/consul.d/consul.hcl 46 | fi 47 | 48 | # Update the {PRIVATE-IPV4} ad-hoc template var 49 | IP=$(curl -H "Metadata-Flavor: Google" http://metadata/computeMetadata/v1/instance/network-interfaces/0/ip) 50 | sed -i -e "s/{PRIVATE-IPV4}/$${IP}/g" /etc/consul.d/consul.hcl 51 | 52 | # Enable and start Consul 53 | systemctl enable consul 54 | systemctl start consul 55 | 56 | # NOMAD CONFIGURATION 57 | 58 | # Add the Nomad CA PEM 59 | cat > /tmp/nomad-ca.pem << EOF 60 | ${nomad_ca_cert} 61 | EOF 62 | sudo mv /tmp/nomad-ca.pem /etc/nomad.d/nomad-ca.pem 63 | 64 | # Add the Nomad Server PEM 65 | cat > /tmp/server.pem << EOF 66 | ${nomad_server_cert} 67 | EOF 68 | sudo mv /tmp/server.pem /etc/nomad.d/server.pem 69 | 70 | # Add the Nomad Server Private Key PEM 71 | cat > /tmp/server-key.pem << EOF 72 | ${nomad_server_private_key} 73 | EOF 74 | sudo mv /tmp/server-key.pem /etc/nomad.d/server-key.pem 75 | 76 | # Update the {NUMBER-OF-SERVERS} ad-hoc template var 77 | sed -i -e "s/{NUMBER-OF-SERVERS}/${number_of_servers}/g" /etc/nomad.d/nomad.hcl 78 | 79 | # Update the {GOSSIP-SECRET-KEY} ad-hoc template var 80 | sed -i -e "s/{GOSSIP-KEY}/${nomad_gossip_secret_key}/g" /etc/nomad.d/nomad.hcl 81 | 82 | # Update the {PROJECT-NAME} ad-hoc template var 83 | sed -i -e "s/{PROJECT-NAME}/${project}/g" /etc/nomad.d/nomad.hcl 84 | 85 | # Update the {ACLs-ENABLED} ad-hoc template var 86 | sed -i -e "s/{ACLs-ENABLED}/${nomad_acls_enabled}/g" /etc/nomad.d/nomad.hcl 87 | 88 | # Update the {PRIVATE-IPV4} ad-hoc template var 89 | IP=$(curl -H "Metadata-Flavor: Google" http://metadata/computeMetadata/v1/instance/network-interfaces/0/ip) 90 | sed -i -e "s/{PRIVATE-IPV4}/$${IP}/g" /etc/nomad.d/nomad.hcl 91 | 92 | # Enable and start Nomad 93 | systemctl enable nomad 94 | systemctl start nomad -------------------------------------------------------------------------------- /vars.tf: -------------------------------------------------------------------------------- 1 | variable "project" { 2 | type = string 3 | description = "The Google Cloud Platform project to deploy the Nomad cluster to." 4 | } 5 | 6 | variable "credentials" { 7 | type = string 8 | default = "./account.json" 9 | description = "The path to the valid Google Cloud Platform credentials file (in JSON format) to use." 10 | } 11 | 12 | variable "region" { 13 | type = string 14 | default = "us-east1" 15 | description = "The region to deploy to." 16 | } 17 | 18 | variable "zone" { 19 | type = string 20 | default = "c" 21 | description = "The zone to deploy to." 22 | } 23 | 24 | variable "cidr_range" { 25 | type = string 26 | default = "192.168.2.0/24" 27 | description = "The CIDR to deploy with." 28 | } 29 | 30 | variable "server_instances" { 31 | type = number 32 | default = 3 33 | description = "The total number of Nomad servers to deploy (use odd numbers)." 34 | } 35 | 36 | variable "server_machine_type" { 37 | type = string 38 | default = "n1-standard-1" 39 | description = "The VM machine type for Nomad servers." 40 | } 41 | 42 | variable "client_instances" { 43 | type = number 44 | default = 5 45 | description = "The total number of Nomad clients to deploy." 46 | } 47 | 48 | variable "client_machine_type" { 49 | type = string 50 | default = "n1-standard-2" 51 | description = "The VM machine type for Nomad clients." 52 | } 53 | 54 | variable "bastion_enabled" { 55 | type = bool 56 | default = true 57 | description = "Enables the SSH bastion." 58 | } 59 | 60 | variable "bastion_machine_type" { 61 | type = string 62 | default = "g1-small" 63 | description = "The VM machine type for the SSH bastion." 64 | } 65 | 66 | variable "ssh_user" { 67 | type = string 68 | default = "ubuntu" 69 | description = "The user to use for SSH." 70 | } 71 | 72 | variable "tls_organization" { 73 | type = string 74 | default = "nomad-dev" 75 | description = "The organization name to use the TLS certificates." 76 | } 77 | 78 | variable "save_ssh_keypair_locally" { 79 | type = bool 80 | default = false 81 | description = "If the SSH keypair (bastion.pub, bastion) should be saved locally." 82 | } 83 | 84 | variable "nomad_acls_enabled" { 85 | type = bool 86 | default = true 87 | description = "If ACLs should be enabled for the Nomad cluster." 88 | } 89 | 90 | variable "docker_default_runtime" { 91 | type = string 92 | default = "runc" 93 | description = "The default Docker runtime to use." 94 | } 95 | 96 | variable "docker_no_new_privileges" { 97 | type = bool 98 | default = true 99 | description = "Set no-new-privileges by default for new containers." 100 | } 101 | 102 | variable "docker_icc_enabled" { 103 | type = bool 104 | default = false 105 | description = "Enables inter-container communication." 106 | } 107 | 108 | variable "loadbalancer_enabled" { 109 | type = bool 110 | default = true 111 | description = "Enables the GCP load balancer for the Nomad Server API to make the cluster available over the internet." 112 | } 113 | 114 | variable "enable_preemptible_bastion_vm" { 115 | type = bool 116 | default = false 117 | description = "Enables a preemptible SSH bastion host to save costs." 118 | } 119 | 120 | variable "enable_preemptible_server_vms" { 121 | type = bool 122 | default = false 123 | description = "Enables preemptible Nomad server hosts to save costs." 124 | } 125 | 126 | variable "enable_preemptible_client_vms" { 127 | type = bool 128 | default = false 129 | description = "Enables preemptible Nomad client hosts to save costs." 130 | } 131 | 132 | variable "enable_shielded_vms" { 133 | type = bool 134 | default = true 135 | description = "Enables shielded VMs for all hosts." 136 | } 137 | 138 | variable "consul_acls_enabled" { 139 | type = bool 140 | default = true 141 | description = "If ACLs should be enabled for the Consul cluster." 142 | } 143 | 144 | variable "consul_acls_default_policy" { 145 | type = string 146 | default = "deny" 147 | description = "The default policy to use for Consul ACLs (allow/deny)." 148 | } 149 | 150 | variable "bucket_location" { 151 | type = string 152 | default = "US" 153 | } 154 | 155 | variable "nomad_load_balancer_enabled" { 156 | type = bool 157 | default = true 158 | description = "Start a public load balancer to be used to handle ingress Nomad traffic to server nodes within the cluster." 159 | } 160 | 161 | 162 | variable "dns_enabled" { 163 | type = bool 164 | default = false 165 | } 166 | 167 | variable "dns_managed_zone_dns_name" { 168 | // example: nomad.example.com 169 | type = string 170 | default = "" 171 | } 172 | 173 | variable "dns_record_set_name_prefix" { 174 | // example: public.$dns_managed_zone_dns_name 175 | type = string 176 | default = "public" 177 | } 178 | 179 | variable "grafana_load_balancer_enabled" { 180 | type = bool 181 | default = false 182 | description = "Start a public load balancer to be used to handle ingress Grafana traffic to client nodes within the cluster." 183 | } 184 | 185 | variable "grafana_dns_managed_zone_dns_name" { 186 | // example: grafana.example.com 187 | type = string 188 | default = "" 189 | } 190 | 191 | variable "grafana_dns_record_set_name_prefix" { 192 | // example: public.$dns_managed_zone_dns_name 193 | type = string 194 | default = "public" 195 | } 196 | 197 | variable "tls_validity_period_hours" { 198 | type = number 199 | default = 17520 200 | description = "The total number of hours the generated mTLS certificates are valid for with a default of 2 years" 201 | } -------------------------------------------------------------------------------- /vms.tf: -------------------------------------------------------------------------------- 1 | module "bastion" { 2 | instances = var.bastion_enabled ? 1 : 0 3 | source = "./modules/vm" 4 | name = "nomad-bastion" 5 | machine_type = var.bastion_machine_type 6 | image = format("%s/bastion", var.project) 7 | subnetwork = module.network.subnetwork 8 | region = var.region 9 | zone = var.zone 10 | tags = ["bastion"] 11 | ssh_user = var.ssh_user 12 | ssh_public_key = tls_private_key.ssh_key.public_key_openssh 13 | external_ip = true 14 | enable_preemptible = var.enable_preemptible_bastion_vm 15 | enable_shielded_vm = var.enable_shielded_vms 16 | } 17 | 18 | # Note: Always escape potential forward-slashes in the the base64 output of the gossip key 19 | # if being passed to commands like sed in a startup-script. And this is obviously not perfect 20 | # for many reasons, but is required for the current configuration setup using a cloud-init 21 | # startup script. 22 | 23 | module "server" { 24 | source = "./modules/vm" 25 | instances = var.server_instances 26 | name = "server" 27 | machine_type = var.server_machine_type 28 | image = format("%s/server", var.project) 29 | subnetwork = module.network.subnetwork 30 | region = var.region 31 | zone = var.zone 32 | tags = ["server"] 33 | ssh_user = var.ssh_user 34 | ssh_public_key = tls_private_key.ssh_key.public_key_openssh 35 | enable_preemptible = var.enable_preemptible_server_vms 36 | enable_shielded_vm = var.enable_shielded_vms 37 | 38 | metadata_startup_script = templatefile("${path.module}/templates/server.sh", { 39 | project = var.project 40 | number_of_servers = var.server_instances 41 | nomad_ca_cert = tls_self_signed_cert.nomad-ca.cert_pem 42 | nomad_server_cert = tls_locally_signed_cert.nomad-server.cert_pem 43 | nomad_server_private_key = tls_private_key.nomad-server.private_key_pem 44 | nomad_gossip_secret_key = replace(random_id.nomad-gossip-key.b64_std, "/", "\\/") 45 | nomad_acls_enabled = var.nomad_acls_enabled 46 | consul_gossip_secret_key = replace(random_id.consul-gossip-key.b64_std, "/", "\\/") 47 | consul_ca_cert = tls_self_signed_cert.consul-ca.cert_pem 48 | consul_server_cert = tls_locally_signed_cert.consul-server.cert_pem 49 | consul_server_private_key = tls_private_key.consul-server.private_key_pem 50 | consul_acls_enabled = var.consul_acls_enabled 51 | consul_acls_default_policy = var.consul_acls_default_policy 52 | consul_master_token = random_uuid.consul_master_token.result 53 | }) 54 | } 55 | 56 | module "client" { 57 | source = "./modules/vm" 58 | instances = var.client_instances 59 | name = "client" 60 | machine_type = var.client_machine_type 61 | image = format("%s/client", var.project) 62 | subnetwork = module.network.subnetwork 63 | region = var.region 64 | zone = var.zone 65 | tags = ["client"] 66 | ssh_user = var.ssh_user 67 | ssh_public_key = tls_private_key.ssh_key.public_key_openssh 68 | enable_preemptible = var.enable_preemptible_client_vms 69 | enable_shielded_vm = var.enable_shielded_vms 70 | 71 | metadata_startup_script = templatefile("${path.module}/templates/client.sh", { 72 | project = var.project 73 | nomad_ca_cert = tls_self_signed_cert.nomad-ca.cert_pem 74 | nomad_client_cert = tls_locally_signed_cert.nomad-client.cert_pem 75 | nomad_client_private_key = tls_private_key.nomad-client.private_key_pem 76 | nomad_acls_enabled = var.nomad_acls_enabled 77 | consul_gossip_secret_key = replace(random_id.consul-gossip-key.b64_std, "/", "\\/") 78 | consul_ca_cert = tls_self_signed_cert.consul-ca.cert_pem 79 | consul_client_cert = tls_locally_signed_cert.consul-client.cert_pem 80 | consul_client_private_key = tls_private_key.consul-client.private_key_pem 81 | consul_acls_enabled = var.consul_acls_enabled 82 | consul_master_token = random_uuid.consul_master_token.result 83 | consul_acls_default_policy = var.consul_acls_default_policy 84 | docker_config = jsonencode({ 85 | "default-runtime" = var.docker_default_runtime, 86 | "no-new-privileges"= var.docker_no_new_privileges, 87 | "icc" = var.docker_icc_enabled, 88 | "runtimes" = { 89 | "runsc" = { 90 | "path" = "/usr/local/bin/runsc" 91 | } 92 | } 93 | }) 94 | }) 95 | } 96 | --------------------------------------------------------------------------------